首页 > 其他 > 详细

电商工作代码

时间:2017-07-24 12:59:27      阅读:220      评论:0      收藏:0      [点我收藏+]
from selenium import webdriver
from scrapy.selector import Selector
import  time
import random
import pymysql
from urllib import parse
import re
import  os

        # a = Selector(text=webdriver.page_source)
        # if a.xpath(‘//*[@id="J_submit"]‘):
        #     time.sleep(15)
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)
        #         bd_id = i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + i.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                             (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         self.connection.commit()
        # else:
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)))
        #         bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)))
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                        (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         conection.commit()
class spider(object):

    def chul3(self,dates):
        a = Selector(text=dates)
        next_url = a.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href).extract_first("")
        return https:+next_url
chuli=spider()

conection = pymysql.connect(host=localhost,user=root,password=123,db=7.24测试,charset=utf8mb4,cursorclass=pymysql.cursors.DictCursor)
with conection.cursor() as cursor:
    sql1 = "select * from 商品id"
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_oldid=[i[id] for i in shop_id]
    sql1 = ‘‘‘
    SELECT
`商品id`.id,
`上架时间`,‘1天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =1 union  SELECT
`商品id`.id,
`上架时间`,‘7天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =7
union  SELECT
`商品id`.id,
`上架时间`,‘30天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =30‘‘‘
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_olxx = [i for i in shop_id]
conection.commit()
cursor =conection.cursor()
webdriver = webdriver.Ie()
url = https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq
webdriver.get(url)
time.sleep(20)
def lll(url):
    webdriver.implicitly_wait(50)
    webdriver.get(url)
    myDynamicElement = webdriver.find_element_by_class_name(pagination)
    a=webdriver.page_source
    time.sleep(random.randrange(2,6))
    selects=Selector(text=a)
    for i in selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl):
        bd_pig = i.xpath("./dt/a/img/@src").re((.*)_)
        bd_name = ‘‘.join(re.findall([\u4e00-\u9fa5], i.xpath(./dd[1]/a/text()).extract_first(‘‘)))
        bd_id = ‘‘.join(re.findall(\d, i.xpath(./dd[1]/a/@href).extract_first(‘‘)))
        bd_much = i.xpath(./dd[1]/div/div[1]/span[2]/text()).extract_first(‘‘)
        bd_idlian=http://item.taobao.com/item.htm?id=+bd_id
        bd_liang = i.xpath(./dd[1]/div/div[last()]/span[last()]/text()).extract_first(‘‘)
        if  bd_id not in shop_oldid:
            sql = "INSERT INTO 商品id (`品牌`, `id`,图片链接,价格,标题,商品地址) VALUES (%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql,
                           (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian))
            conection.commit()
            webdriver.implicitly_wait(50)
            webdriver.get(http://item.taobao.com/item.htm?id=+bd_id)
            myDynamicElement = webdriver.find_element_by_class_name(tb-price-spec)
            time.sleep(random.randrange(2, 6))
            date=webdriver.page_source
            select_xixi = Selector(text=date)
            liem = select_xixi.xpath(//*[@id="J_TMySize"]/@data-value).extract_first("")
            sql = update  `商品id`  set  `商品id`.`类目` = %s  where id = %s
            cursor.execute(sql,
                           (liem, bd_id))
            conection.commit()
            c=1
            ee=1
            for i in select_xixi.xpath(//*[@id="J_isku"]/div/dl):
                b = i.xpath(./dt/text()).extract_first("")
                if 尺码 in b:
                    aa = i.xpath(./dd/ul/li/a/span/text()).extract()
                    ee = len(aa)
                    dd =  .join(aa)
                    sql = update  `商品id`  set  `商品id`.`尺码` = %s  where id = %s
                    cursor.execute(sql,
                                   (dd, bd_id))
                    conection.commit()
                if 颜色 in b:
                    a = i.xpath(./dd/ul/li/a/span/text()).extract()
                    c = len(a)
                    d =  .join(a)
                    sql = update  `商品id`  set  `商品id`.`颜色` = %s  where id = %s
                    cursor.execute(sql,
                                   (d, bd_id))
                    conection.commit()
            w = c * ee
            sql= update  `商品id`  set  `商品id`.`sku量` = %s  where id = %s
            cursor.execute(sql,
                           (w,bd_id))
            conection.commit()



            title = path + r\\ +shop.split(",")[0] + r\\ + ‘‘.join(
                re.findall(\d, i.xpath(./dd[1]/a/@href).extract_first(‘‘))) + re.sub("\W", "", webdriver.title)
            capture(webdriver, title + .jpg)
        for i in shop_olxx:
            if i[id] == bd_id:
                sql = "UPDATE 商品id set " + i[日期] + " =  (%s) where id = %s"
                cursor.execute(sql,
                               (bd_liang, i[id]))
                conection.commit()
    if  selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href).extract_first(""):
        lll(https:+selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href).extract_first(""))


path=os.getcwd()


def capture(webder, save_fn="capture.png"):
    # browser = webdriver.Ie()  # Get local session of firefox

    # browser.get(url)  # Load page
    webder.execute_script("""
               (function () {
                 var y = 0;
                 var step = 100;
                 window.scroll(0, 0);

                 function f() {
                   if (y < document.body.scrollHeight) {
                     y += step;
                     window.scroll(0, y);
                     setTimeout(f, 50);
                   } else {
                     window.scroll(0, 0);
                     document.title += "scroll-done";
                   }
                 }

                 setTimeout(f, 1000);
               })();
             """)

    for i in range(30):
        if "scroll-done" in webder.title:
            break
        time.sleep(1)

    webder.save_screenshot(save_fn)
with open(os.getcwd() + r\1.csv, r) as c:
    for shop in c.readlines():
        url = shop.split(",")[2]
        lll(url)

 

电商工作代码

原文:http://www.cnblogs.com/gao-xiang/p/7228194.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!