首页 > 其他 > 详细

爬取某东商品信息

时间:2019-05-29 19:46:01      阅读:113      评论:0      收藏:0      [点我收藏+]
技术分享图片
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
import time


option = ChromeOptions()
option.add_argument("disable-infobars")

def get_goods(driver):
    num = 400
    for line in range(20):
        js = """
        window.scrollTo(0,%s)
        """%num
        num+=500
        driver.execute_script(js)
        time.sleep(0.1)

    #1查找所有商品的爷爷标签
    good_div = driver.find_element_by_id("J_goodsList")

    #2获取所有商品li标签
    good_list = good_div.find_elements_by_class_name("gl-item")
    print(good_list)
    for good in good_list:
        """
        商品信息:
            名称
            价格
            链接
            图片
            评论人数
        """
        good_name = good.find_element_by_css_selector(".p-name em").text.replace("\n","")
        good_price = good.find_element_by_css_selector(".p-price").text.replace("\n","")
        good_link = good.find_element_by_css_selector(".p-img a").get_attribute("href")

        good_img = good.find_element_by_css_selector(.p-img img).get_attribute(src)
        # good_img = good.find_element_by_css_selector(".p-img img").get_attribute("src")
        good_commit = good.find_element_by_css_selector(".p-commit").text.replace("\n","")
        goods = ‘‘‘
             商品信息:
                名称 %s
                价格 %s
                链接 %s
                图片 %s
                评论人数 %s
        ‘‘‘%(good_name,good_price,good_link,good_img,good_commit)

        print(goods)
        with open("jd.txt","a",encoding="utf-8")as f:
            f.write(goods+"\n")

    next_tag = driver.find_element_by_class_name(pn-next)
    next_tag.click()
    time.sleep(2)
    get_goods(driver)



driver = webdriver.Chrome(chrome_options=option)
try:
    driver.get("https://www.jd.com/")
    driver.implicitly_wait(10)
    input_tag = driver.find_element_by_id("key")
    input_tag.send_keys("坦克")
    #通过回车查找
    # input_tag.send_keys(Keys.ENTER)
    search_button = driver.find_element_by_class_name("button")
    search_button.click()
    get_goods(driver)

    time.sleep(1000)
finally:
    driver.close()
View Code

 

爬取某东商品信息

原文:https://www.cnblogs.com/tangda/p/10945487.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!