def parse(self, response): div_list = response.css(‘div.post_item‘) for div in div_list: item = ScrItem() dec = div.css(‘p.post_item_summary::text‘).extract()[-1] author = div.css(‘.post_item_foot a::text‘).extract_first() item[‘author‘] = author # 将数据存入meta中,callback调用下一个函数 yield Request(url, callback=self.parse_detail, meta={‘item‘:item}) next_page = response.css(‘div.pager a:last-child::attr(href)‘).extract_first() yield Request(next_page) def parse_detail(self, response): # 将数据取出 item = response.meta.get(‘item‘) content = response.css(‘#cnblogs_post_body‘).extract_first() item[‘content‘] = content yield item
原文:https://www.cnblogs.com/baohanblog/p/12689087.html