import requests from lxml import etree # 导入xpath headers = { ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36‘ } tree = etree.parse(‘./相关/test.html‘) #定位标签 # print(tree.xpath(‘//div‘)) #定位指定的所有标签 #属性定位 # print(tree.xpath(‘//div[@class="tang"]‘)) #索引定位:索引是从1开始的 # print(tree.xpath(‘//div[1]‘)) #层级定位:/表示一个层级,//表示多个层级 # print(tree.xpath(‘//div[@class="tang"]/ul/li[4]/a‘)) # print(tree.xpath(‘//div[@class="tang"]//li[4]/a‘)) #内容提取:/text(),//text() # print(tree.xpath(‘//div[@class="tang"]/ul/li[4]/a/text()‘)[0]) #取属性值:/@atrrName # print(tree.xpath(‘//div[@class="tang"]/ul/li[4]/a/@href‘)[0])
原文:https://www.cnblogs.com/zhangyh-blog/p/14741941.html