电商工作代码

时间：2017-07-24 13:10:30 阅读：188 评论：0 收藏：0 [点我收藏+]
标签：pass script mysql apt val window .com lines title
from selenium import webdriver
from scrapy.selector import Selector
import  time
import random
import pymysql
from urllib import parse
import re
import  os

        # a = Selector(text=webdriver.page_source)
        # if a.xpath(‘//*[@id="J_submit"]‘):
        #     time.sleep(15)
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)
        #         bd_id = i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + i.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                             (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         self.connection.commit()
        # else:
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)))
        #         bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)))
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                        (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         conection.commit()
class spider(object):

    def chul3(self,dates):
        a = Selector(text=dates)
        next_url = a.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href‘).extract_first("")
        return ‘https:‘+next_url
chuli=spider()

conection = pymysql.connect(host=‘localhost‘,user=‘root‘,password=‘123‘,db=‘7.24测试‘,charset=‘utf8mb4‘,cursorclass=pymysql.cursors.DictCursor)
with conection.cursor() as cursor:
    sql1 = "select * from 商品id"
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_oldid=[i[‘id‘] for i in shop_id]
    sql1 = ‘‘‘
    SELECT
`商品id`.id,
`上架时间`,‘1天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =1 union  SELECT
`商品id`.id,
`上架时间`,‘7天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =7
union  SELECT
`商品id`.id,
`上架时间`,‘30天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =30‘‘‘
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_olxx = [i for i in shop_id]
conection.commit()
cursor =conection.cursor()
webdriver = webdriver.Ie()
url = ‘https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq‘
webdriver.get(url)
time.sleep(20)
def lll(url):
    webdriver.implicitly_wait(50)
    webdriver.get(url)
    myDynamicElement = webdriver.find_element_by_class_name(‘pagination‘)
    a=webdriver.page_source
    time.sleep(random.randrange(2,6))
    selects=Selector(text=a)
    for i in selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        bd_pig = i.xpath("./dt/a/img/@src").re(‘(.*)_‘)
        bd_name = ‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)))
        bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)))
        bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        bd_idlian=‘http://item.taobao.com/item.htm?id=‘+bd_id
        bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span[last()]/text()‘).extract_first(‘‘)
        if  bd_id not in shop_oldid:
            sql = "INSERT INTO 商品id (`品牌`, `id`,图片链接,价格,标题,商品地址) VALUES (%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql,
                           (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian))
            conection.commit()
            webdriver.implicitly_wait(50)
            webdriver.get(‘http://item.taobao.com/item.htm?id=‘+bd_id)
            myDynamicElement = webdriver.find_element_by_class_name(‘tb-price-spec‘)
            time.sleep(random.randrange(2, 6))
            date=webdriver.page_source
            select_xixi = Selector(text=date)
            liem = select_xixi.xpath(‘//*[@id="J_TMySize"]/@data-value‘).extract_first("")
            sql = ‘update  `商品id`  set  `商品id`.`类目` = %s  where id = %s‘
            cursor.execute(sql,
                           (liem, bd_id))
            conection.commit()
            c=1
            ee=1
            for i in select_xixi.xpath(‘//*[@id="J_isku"]/div/dl‘):
                b = i.xpath(‘./dt/text()‘).extract_first("")
                if ‘尺码‘ in b:
                    aa = i.xpath(‘./dd/ul/li/a/span/text()‘).extract()
                    ee = len(aa)
                    dd = ‘ ‘.join(aa)
                    sql = ‘update  `商品id`  set  `商品id`.`尺码` = %s  where id = %s‘
                    cursor.execute(sql,
                                   (dd, bd_id))
                    conection.commit()
                if ‘颜色‘ in b:
                    a = i.xpath(‘./dd/ul/li/a/span/text()‘).extract()
                    c = len(a)
                    d = ‘ ‘.join(a)
                    sql = ‘update  `商品id`  set  `商品id`.`颜色` = %s  where id = %s‘
                    cursor.execute(sql,
                                   (d, bd_id))
                    conection.commit()
            w = c * ee
            sql= ‘update  `商品id`  set  `商品id`.`sku量` = %s  where id = %s‘
            cursor.execute(sql,
                           (w,bd_id))
            conection.commit()



            title = path + r‘\\‘ +shop.split(",")[0] + r‘\\‘ + ‘‘.join(
                re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘))) + re.sub("\W", "", webdriver.title)
            capture(webdriver, title + ‘.jpg‘)
        for i in shop_olxx:
            if i[‘id‘] == bd_id:
                sql = "UPDATE 商品id set " + i[‘日期‘] + " =  (%s) where id = %s"
                cursor.execute(sql,
                               (bd_liang, i[‘id‘]))
                conection.commit()
    if  selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href‘).extract_first(""):
        lll(‘https:‘+selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href‘).extract_first(""))


path=os.getcwd()


def capture(webder, save_fn="capture.png"):
    # browser = webdriver.Ie()  # Get local session of firefox

    # browser.get(url)  # Load page
    webder.execute_script("""
               (function () {
                 var y = 0;
                 var step = 100;
                 window.scroll(0, 0);

                 function f() {
                   if (y < document.body.scrollHeight) {
                     y += step;
                     window.scroll(0, y);
                     setTimeout(f, 50);
                   } else {
                     window.scroll(0, 0);
                     document.title += "scroll-done";
                   }
                 }

                 setTimeout(f, 1000);
               })();
             """)

    for i in range(30):
        if "scroll-done" in webder.title:
            break
        time.sleep(1)

    webder.save_screenshot(save_fn)
with open(os.getcwd() + r‘\1.csv‘, ‘r‘) as c:
    for shop in c.readlines():
        url = shop.split(",")[2]
        lll(url)
电商工作代码
标签：pass script mysql apt val window .com lines title
原文地址：http://www.cnblogs.com/gao-xiang/p/7228194.html
踩
(0)
评论一句话评论（0）
分享档案
更多>
2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)
周排行