首页 > 编程语言 > 详细

python3.7 爬取QQ空间好友

时间:2018-01-29 23:43:30      阅读:426      评论:0      收藏:0      [点我收藏+]

 使用selenium库自动登录,记录登录的Cookie。以下URL分别代表不同的动作,虽然没有全用。

留言:
https://user.qzone.qq.com/proxy/domain/m.qzone.qq.com/cgi-bin/new/get_msgb?
uin=1612893772&hostUin=1148639090&start=0&s=0.8883444517176473&format=jsonp&num=10
&inCharset=utf-8&outCharset=utf-8&g_tk=2208268
&qzonetoken=bede67d5ca4dc0944791e45f795beeb346e50a23b20df9b4152a142232a7f7cd40e26b929798e3b74bab&g_tk=2208268
好友:
https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?
uin=1612893772&do=1&rd=0.19169828437926406&fupdate=1&clean=1&g_tk=108064521
&qzonetoken=77bdd3f44636c7b403a6462f493a2e6e02e6b8cd1772fe928bf511442e491315df84454ad4455093f2&g_tk=108064521

说说:
https://h5.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_module?i_uin=1148639090&i_login_uin=1612893772
&mode=4&previewV8=1&style=25&version=8&needDelOpr=true&transparence=true&hideExtend=false
&showcount=5&MORE_FEEDS_CGI=http://ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_act_all&refer=2&paramstring=os-winxp|100

兴趣爱好:
https://h5.qzone.qq.com/proxy/domain/page.qq.com/cgi-bin/profile/interest_get?
uin=851676467&vuin=1612893772&flag=1&rd=0.7835457101159748&fupdate=1&
g_tk=896484925&qzonetoken=38bcb8fb59e772a31ff4ca2358781258d1c7f4e2c8f640e537d6bf52ccc4ab48c7614fa3a57a5cabf0

以下是具体代码:

  1 from urllib import parse
  2 from selenium import webdriver
  3 import requests
  4 import json
  5 from json import loads
  6 import time
  7 import pymssql
  8 import datetime
  9 
 10 def get_key_values(body,key,end =;):
 11     """提取body中不包括的key,分片操作
 12 
 13     :param body: 父字符串
 14     :param key: 子字符串
 15     :param end: 结束字符串,默认为;
 16     :return: 不包括子字符串的字符串
 17     """
 18     return body[body.find(key) + len(key): body.find(;, body.find(key))]
 19 
 20 def get_key(cookies):
 21     """获取cookie中的相关键的值
 22     解密
 23 
 24     :param cookies: 缓存
 25     :return: 相关键的值
 26     """
 27     key = get_key_values(cookies,p_skey=)
 28     h = 5381
 29     for i in key:
 30         h += (h << 5) + ord(i)
 31     return h & 2147483647
 32 
 33 def web_login_cookie():
 34     """url = ‘https://user.qzone.qq.com/QQ号相关的缓存
 35     实现自动化登录
 36 
 37     :return: 浏览器的缓存
 38     """
 39     driver = webdriver.Chrome()
 40     qq_account = 1612893772
 41     qq_password = 13974162858x
 42     login(driver,qq_account,qq_password)
 43     time.sleep(10)
 44     driver.get(https://user.qzone.qq.com/{}.format(qq_account))
 45     cookie = ‘‘
 46     for elem in driver.get_cookies(): # 记录登录的Cookie
 47         # elem 为 dict类型
 48         cookie += elem["name"] + "=" + elem["value"] + ";"
 49     # cookies = cookie
 50     return cookie
 51 
 52 def login(driver,qq_account,qq_password):
 53     """登录
 54 
 55     :param driver: 浏览器对象
 56     :param qq_account: QQ账号
 57     :param qq_password: QQ密码
 58     :return:
 59     """
 60     driver.maximize_window()
 61     driver.get(http://user.qzone.qq.com)
 62     driver.switch_to.frame(login_frame)
 63     time.sleep(1)
 64     driver.find_element_by_id("switcher_plogin").click()
 65     driver.find_element_by_id("u").send_keys(qq_account)
 66     time.sleep(2)
 67     driver.find_element_by_id("p").send_keys(qq_password)
 68     time.sleep(2)
 69     driver.find_element_by_id("login_button").click()
 70 
 71 def send_requests(req,headers,url,params=None):
 72     """url_friend = ‘https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?‘
 73     url_friend携带以下参数:uin(QQ号)、do(没有它,返回空,默认值为:1)
 74     rd,g_t,qzonetoken(每次登录都发生变化,从Cookiezz中获取)
 75     fupdate,clean(默认值为:1)
 76 
 77     :param req: 请求(Request),该请求为会话
 78     :param headers: 请求头
 79     :param params: 请求参数
 80     :return: JSONP数据
 81     """
 82     if None != params:
 83         url = url + parse.urlencode(params)
 84     # url = url+‘&offset=‘+str(0)
 85     page = req.get(url=url, headers=headers)
 86     return page.text
 87 
 88 def get_each_str(req,uin,headers):
 89     each_url = https://user.qzone.qq.com/{}.format(uin)
 90     page = req.get(url=each_url, headers=headers)
 91 
 92 def friend_db(dicts,name=‘‘):
 93     """操作DB
 94 
 95     :param dicts: 数据字典信息
 96     :param name: 备注名
 97     :return: void
 98     """
 99     if len(str(dicts[birthyear])) < 4:
100         dicts[birthyear] = 1900
101     if dicts[birthday][1:2] == 0:
102         dicts[birthday] = 01-01
103     if len(dicts[signature]) > 70:
104         dicts[signature] = ‘‘
105     friend_db_dict = {
106         friendInfo: [
107         dicts[uin], name, dicts[age],  if dicts[sex] == 1 else 
108         , datetime.datetime.strptime(str(dicts[birthyear]) + - + str(dicts[birthday]), %Y-%m-%d)],
109         friendPlace: [
110         dicts[uin], dicts[company],dicts[career], dicts[hco] + dicts[hp] + dicts[hc],
111         dicts[country] + dicts[province] + dicts[city],dicts[cco] + dicts[cp] + dicts[cc], dicts[cb]],
112         friendNet: [
113         dicts[uin], dicts[nickname], dicts[spacename], dicts[desc], dicts[signature]]
114     }
115     conn = pymssql.connect(host=localhost, user=sa, password=123456, database=friendDB,
116                             charset=utf8)
117     cur = conn.cursor()
118     sql = "begin tran insertData insert into friendInfo values({},‘{}‘,{},‘{}‘,‘{}‘);" 119           "insert into friendPlace values({},‘{}‘,‘{}‘,‘{}‘,‘{}‘,‘{}‘,‘{}‘);" 120           "insert into friendNet values({},‘{}‘,‘{}‘,‘{}‘,‘{}‘);" 121           "commit tran insertData".122         format(friend_db_dict[friendInfo][0],friend_db_dict[friendInfo][1],friend_db_dict[friendInfo][2]
123                 ,friend_db_dict[friendInfo][3],friend_db_dict[friendInfo][4],friend_db_dict[friendPlace][0],
124                 friend_db_dict[friendPlace][1],friend_db_dict[friendPlace][2],friend_db_dict[friendPlace][3],
125                 friend_db_dict[friendPlace][4],friend_db_dict[friendPlace][5],friend_db_dict[friendPlace][6],
126                 friend_db_dict[friendNet][0],friend_db_dict[friendNet][1],friend_db_dict[friendNet][2],
127                 friend_db_dict[friendNet][3],friend_db_dict[friendNet][4])
128     print(sql: ,sql)
129     cur.execute(sql)
130     conn.commit()
131     cur.close()
132     conn.close()
133 
134 def main():
135     """主要操作
136 
137     :return: void
138     """
139     req = requests.session()
140     headers={host: h5.qzone.qq.com,
141              accept-encoding:gzip, deflate, br,
142              accept-language:zh-CN,zh;q=0.8,
143              accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,
144              user-agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
145                           59.0.3071.115 Safari/537.36,
146              connection: keep-alive}
147     cookie = web_login_cookie()
148     print(cookie,cookie)
149     g_tk = get_key(cookie)
150     qzonetoken_friend = get_key_values(cookie,ptcz=)
151     uin_friend = get_key_values(cookie,ptui_loginuin=)
152     rd_friend = get_key_values(cookie,_qpsvr_localtk=)
153     print(friend_data,qzontoken:%s;uin:%s;rd:%s %(qzonetoken_friend,uin_friend,rd_friend))
154     headers[Cookie]=cookie
155     params_friend = {"uin": uin_friend,"fupdate": 1,"action": 1,"do":1,"g_tk":g_tk,"rd":rd_friend,
156                      qzonetoken:qzonetoken_friend}
157     url_friend = https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?
158     data_friend_str = send_requests(req,headers,url_friend,params=params_friend)
159     data_friend_dict = loads(data_friend_str[0+len(_Callback():data_friend_str.find();)])
160     print(data_friend_dict: ,data_friend_dict)
161     if data_friend_dict[code] != 0: # code = -3000 message = ‘请先登录‘
162         time.sleep(10)
163         main()
164     else:
165         data_friend_list = list(data_friend_dict[data][items_list])
166         for i in range(len(data_friend_list)):
167             each_uin = data_friend_list[i][uin]
168             each_url = https://h5.qzone.qq.com/proxy/domain/base.qzone.qq.com/cgi-bin/user/cgi_userinfo_get_all?
169             params_each = {"uin": each_uin, "fupdate": 1, "vuin": uin_friend, "g_tk": g_tk, "rd": rd_friend,
170                            qzonetoken: qzonetoken_friend}
171             time.sleep(1)
172             data_each_str = send_requests(req,headers,each_url,params_each)
173             try:
174                 data_each_dict = loads(data_each_str[0+len("_Callback("):data_each_str.find(");")])
175             except json.decoder.JSONDecodeError as e:
176                 with open(leak.txt,a,encoding=utf8) as file: # 数据持久化,统计错误信息
177                     file.write(except:  + str(each_uin) + " " + data_friend_list[i][name] + " " + e.msg + "\n")
178                     continue
179             print(data_each_dict: ,data_each_dict)
180             if data_each_dict[code] == 0: # code = -4009 message = ‘没有访问权限‘
181                 friend_db(data_each_dict[data],name=data_friend_list[i][name])
182             else:
183                 with open(leak.txt,a,encoding=utf8) as file: # 数据持久化,统计错误信息
184                     file.write((没有访问权限:  + str(each_uin) + " " + data_friend_list[i][name] + "\n"))
185 main()

python3.7 爬取QQ空间好友

原文:https://www.cnblogs.com/YuWeiXiF/p/8379803.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!