class scrapy.http.Request(url[, callback, method="GET", headers, body, cookies, meta, encoding=‘utf8‘, priority=0, dont_filter=Falese, errback]))
import scrapy from scrapy.linkextractors import LinkExtractor class DeepInSpider( scrapy.Spider ): name = ‘example.com‘ start_urls = [ ‘https://www.baidu.com‘ ] def parse( self, response ): link_extractor = LinkExtractor() seen =set() linkes = link_extractor.extract_links(response) links = [ link for link in linkes if link not in senn ] for link in links: print( link.url ) seen.add(link) cd = None if ( link.contains( ‘detail ) ): cd = self.parse_detail yield scrapy.Request( url=link, callback=cd ) yield scrapy.Request( url = link.url, callback=cd ) def parse_detail(self, response): pass
原文:https://www.cnblogs.com/zhangjian0092/p/11693669.html