1 # -*- coding=utf-8 -*- 2 # software: scrapy 3 # datetime:2020/4/8 2:48 下午 4 import gevent 5 from gevent import monkey 6 monkey.patch_all() 7 import requests 8 from lxml import etree 9 import time 10 from concurrent.futures.thread import ThreadPoolExecutor 11 from concurrent.futures.process import ProcessPoolExecutor 12 13 14 def timer(func): 15 def warpper(*args, **kwargs): 16 start_time = time.time() 17 ret = func(*args, **kwargs) 18 print(f"耗时:{func}", time.time() - start_time) 19 return ret 20 21 return warpper 22 23 24 class OrderSpider(object): 25 26 def __init__(self): 27 self.url = "http://www.bewindoweb.com/dwg.php" 28 29 def request(self): 30 res = requests.get(self.url) 31 if res.status_code == 200: 32 return self.parse(res.text) 33 34 def parse(self, html): 35 node = etree.HTML(html) 36 return node.xpath("//div[@class=‘card-dwg-hrefc‘]/a/div/div[2]/text()") 37 38 39 @timer 40 def run(function, n): 41 """ 42 顺序抓取 43 :param function: 44 :param n: 45 :return: 46 """ 47 a = None 48 for i in range(n): 49 a = function() 50 print(a) 51 52 53 def callback(future): 54 """ 55 回调函数 56 :param future: 57 :return: 58 """ 59 return future.result() 60 61 62 @timer 63 def thread_run(function, n): 64 """ 65 多线程抓取 66 :param function: 67 :param n: 68 :return: 69 """ 70 pools = ThreadPoolExecutor(6) 71 for i in range(n): 72 result = pools.submit(function) 73 result.add_done_callback(callback) 74 print(result.result()) 75 pools.shutdown(wait=True) 76 77 78 @timer 79 def process_run(function, n): 80 """ 81 多进程抓取 82 :param function: 83 :param n: 84 :return: 85 """ 86 pools = ProcessPoolExecutor(6) 87 for i in range(n): 88 result = pools.submit(function) 89 result.add_done_callback(callback) 90 print(result.result()) 91 pools.shutdown(wait=True) 92 93 94 @timer 95 def gevent_run(function, n): 96 """ 97 多协程抓取 98 :param function: 99 :param n: 100 :return: 101 """ 102 tasks = [] 103 for i in range(n): 104 tasks.append(gevent.spawn(function)) 105 gevent.joinall(tasks) 106 a = None 107 for task in tasks: 108 a = task.value 109 print(a) 110 111 112 if __name__ == ‘__main__‘: 113 n = 100 114 order_spider = OrderSpider() 115 run(order_spider.request, n) 116 thread_run(order_spider.request, n) 117 process_run(order_spider.request, n) 118 gevent_run(order_spider.request, n)
原文:https://www.cnblogs.com/dreamall/p/12660370.html