首页 > 其他 > 详细

第十三节 电影天堂项目实战

时间:2020-03-16 21:59:58      阅读:66      评论:0      收藏:0      [点我收藏+]
 1 from lxml import etree
 2 import requests
 3 
 4 
 5 baseurl = https://www.dytt8.net
 6 headers = {
 7     User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36,
 8     Referer: https://www.dytt8.net/html/gndy/dyzz/index.html
 9 }
10 def agent(ur):
11     resp = requests.get(ur,headers = headers)
12     # parse = etree.HTMLParser()
13     text = resp.text
14     html = etree.HTML(text)
15     # a = etree.tostring(html, encoding=‘utf-8‘).decode(‘utf-8‘)
16     return html
17 
18 def movie_url_list(html):
19     url = html.xpath("//table[@class=‘tbspan‘]//a/@href")
20     return  url
21 
22 def parse_info(info,rule):
23     return  info.replace(rule,‘‘).strip()
24 
25 def xiangqingye(url):
26     resp = requests.get(url, headers=headers)
27     text = resp.content.decode(gbk)
28     html = etree.HTML(text)
29     a = html.xpath(//div[@id="Zoom"]//text())
30     movie = {}
31     for info in a:
32         if info.startswith("◎片  名"):
33             info = parse_info(info, ◎片  名)
34             movie[pianming] = info
35         if info.startswith("◎年  代"):
36             info = parse_info(info, ◎年  代)
37             movie[niandai] = info
38         if info.startswith("◎产  地"):
39             info = parse_info(info, ◎产  地)
40             movie[chandi] = info
41         if info.startswith("◎类  别"):
42             info = parse_info(info, ◎类  别)
43             movie[leixing] = info
44         if info.startswith("◎上映日期"):
45             info = parse_info(info, ◎上映日期)
46             movie[shangyingshijian] = info
47         if info.startswith("◎豆瓣评分"):
48             info = parse_info(info, ◎豆瓣评分)
49             movie[doubanpingfen] = info
50         if info.startswith("◎片  长"):
51             info = parse_info(info, ◎片  长)
52             movie[pianchang] = info
53         if info.startswith("◎标  签"):
54             info = parse_info(info, ◎标  签)
55             movie[biaoqian] = info
56     return movie
57 
58 def alldata():
59     srt1 = https://www.dytt8.net/html/gndy/dyzz/list_23_
60     str2 = .html
61     movies = []
62     for i in range(1,2):
63         url = srt1+str(i)+str2
64         ura = agent(url)
65         b = movie_url_list(ura)
66         for z in b:
67             c = baseurl + z
68             movielist = xiangqingye(c)
69             movies.append(movielist)
70     return movies
71 if __name__ == __main__:
72     print(alldata())

 

第十三节 电影天堂项目实战

原文:https://www.cnblogs.com/kogmaw/p/12506974.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!