/****************************************************
go语言实现爬虫_联想词 咨询:qq:1465376564
黄哥python培训班所写
python北京周末培训班
https://github.com/pythonpeixun/article/blob/master/beijing_weekend.md
python
上海周末培训班
https://github.com/pythonpeixun/article/blob/master/shanghai_weekend.md
*****************************************************/
package main
import (
"fmt"
"io/ioutil"
"net/http"
"regexp"
)
//定义新的数据类型
type Spider struct {
url string
header map[string]string
}
//定义 Spider的方法
func (keyword Spider) get_html_header() string {
client := &http.Client{}
req, err := http.NewRequest("GET", keyword.url, nil)
if err != nil {
}
for key, value := range keyword.header {
req.Header.Add(key, value)
}
resp, err := client.Do(req)
if err != nil {
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
}
return string(body)
}
func main() {
header := map[string]string{"Host": "sug.so.360.cn",
"Referer": "http://www.so.com/",
"DNT": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36",
"Cookie": "__huid=104rl%2B0HjG2YltBarbPPIz2w7HTbLrv43gETLeVtBdIEI%3D",
}
keyword := "科技"
url := "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word=" + keyword
spider := &Spider{url, header}
html := spider.get_html_header()
rp1 := regexp.MustCompile("{(.*?)}")
find_txt := rp1.FindAllString(html, -1)
fmt.Println(find_txt)
}
原文:http://my.oschina.net/pythonpeixun/blog/380908