码迷,mamicode.com
首页 > 其他好文 > 详细

电商工作代码

时间:2017-07-24 13:10:30      阅读:188      评论:0      收藏:0      [点我收藏+]

标签:pass   script   mysql   apt   val   window   .com   lines   title   

from selenium import webdriver
from scrapy.selector import Selector
import  time
import random
import pymysql
from urllib import parse
import re
import  os

        # a = Selector(text=webdriver.page_source)
        # if a.xpath(‘//*[@id="J_submit"]‘):
        #     time.sleep(15)
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)
        #         bd_id = i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + i.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                             (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         self.connection.commit()
        # else:
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
        #         bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘)
        #         bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘)))
        #         bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘)))
        #         bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
        #         bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘)
        #
        #         sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)"
        #         cursor.execute(sql,
        #                        (bd_id, bd_pig, bd_much, bd_name, bd_liang))
        #         conection.commit()
class spider(object):

    def chul3(self,dates):
        a = Selector(text=dates)
        next_url = a.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href).extract_first("")
        return https:+next_url
chuli=spider()

conection = pymysql.connect(host=localhost,user=root,password=123,db=7.24测试,charset=utf8mb4,cursorclass=pymysql.cursors.DictCursor)
with conection.cursor() as cursor:
    sql1 = "select * from 商品id"
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_oldid=[i[id] for i in shop_id]
    sql1 = ‘‘‘
    SELECT
`商品id`.id,
`上架时间`,‘1天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =1 union  SELECT
`商品id`.id,
`上架时间`,‘7天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =7
union  SELECT
`商品id`.id,
`上架时间`,‘30天销量‘ as 日期
FROM
`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架时间`,CURDATE())   =30‘‘‘
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_olxx = [i for i in shop_id]
conection.commit()
cursor =conection.cursor()
webdriver = webdriver.Ie()
url = https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq
webdriver.get(url)
time.sleep(20)
def lll(url):
    webdriver.implicitly_wait(50)
    webdriver.get(url)
    myDynamicElement = webdriver.find_element_by_class_name(pagination)
    a=webdriver.page_source
    time.sleep(random.randrange(2,6))
    selects=Selector(text=a)
    for i in selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl):
        bd_pig = i.xpath("./dt/a/img/@src").re((.*)_)
        bd_name = ‘‘.join(re.findall([\u4e00-\u9fa5], i.xpath(./dd[1]/a/text()).extract_first(‘‘)))
        bd_id = ‘‘.join(re.findall(\d, i.xpath(./dd[1]/a/@href).extract_first(‘‘)))
        bd_much = i.xpath(./dd[1]/div/div[1]/span[2]/text()).extract_first(‘‘)
        bd_idlian=http://item.taobao.com/item.htm?id=+bd_id
        bd_liang = i.xpath(./dd[1]/div/div[last()]/span[last()]/text()).extract_first(‘‘)
        if  bd_id not in shop_oldid:
            sql = "INSERT INTO 商品id (`品牌`, `id`,图片链接,价格,标题,商品地址) VALUES (%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql,
                           (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian))
            conection.commit()
            webdriver.implicitly_wait(50)
            webdriver.get(http://item.taobao.com/item.htm?id=+bd_id)
            myDynamicElement = webdriver.find_element_by_class_name(tb-price-spec)
            time.sleep(random.randrange(2, 6))
            date=webdriver.page_source
            select_xixi = Selector(text=date)
            liem = select_xixi.xpath(//*[@id="J_TMySize"]/@data-value).extract_first("")
            sql = update  `商品id`  set  `商品id`.`类目` = %s  where id = %s
            cursor.execute(sql,
                           (liem, bd_id))
            conection.commit()
            c=1
            ee=1
            for i in select_xixi.xpath(//*[@id="J_isku"]/div/dl):
                b = i.xpath(./dt/text()).extract_first("")
                if 尺码 in b:
                    aa = i.xpath(./dd/ul/li/a/span/text()).extract()
                    ee = len(aa)
                    dd =  .join(aa)
                    sql = update  `商品id`  set  `商品id`.`尺码` = %s  where id = %s
                    cursor.execute(sql,
                                   (dd, bd_id))
                    conection.commit()
                if 颜色 in b:
                    a = i.xpath(./dd/ul/li/a/span/text()).extract()
                    c = len(a)
                    d =  .join(a)
                    sql = update  `商品id`  set  `商品id`.`颜色` = %s  where id = %s
                    cursor.execute(sql,
                                   (d, bd_id))
                    conection.commit()
            w = c * ee
            sql= update  `商品id`  set  `商品id`.`sku量` = %s  where id = %s
            cursor.execute(sql,
                           (w,bd_id))
            conection.commit()



            title = path + r\\ +shop.split(",")[0] + r\\ + ‘‘.join(
                re.findall(\d, i.xpath(./dd[1]/a/@href).extract_first(‘‘))) + re.sub("\W", "", webdriver.title)
            capture(webdriver, title + .jpg)
        for i in shop_olxx:
            if i[id] == bd_id:
                sql = "UPDATE 商品id set " + i[日期] + " =  (%s) where id = %s"
                cursor.execute(sql,
                               (bd_liang, i[id]))
                conection.commit()
    if  selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href).extract_first(""):
        lll(https:+selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href).extract_first(""))


path=os.getcwd()


def capture(webder, save_fn="capture.png"):
    # browser = webdriver.Ie()  # Get local session of firefox

    # browser.get(url)  # Load page
    webder.execute_script("""
               (function () {
                 var y = 0;
                 var step = 100;
                 window.scroll(0, 0);

                 function f() {
                   if (y < document.body.scrollHeight) {
                     y += step;
                     window.scroll(0, y);
                     setTimeout(f, 50);
                   } else {
                     window.scroll(0, 0);
                     document.title += "scroll-done";
                   }
                 }

                 setTimeout(f, 1000);
               })();
             """)

    for i in range(30):
        if "scroll-done" in webder.title:
            break
        time.sleep(1)

    webder.save_screenshot(save_fn)
with open(os.getcwd() + r\1.csv, r) as c:
    for shop in c.readlines():
        url = shop.split(",")[2]
        lll(url)

 

电商工作代码

标签:pass   script   mysql   apt   val   window   .com   lines   title   

原文地址:http://www.cnblogs.com/gao-xiang/p/7228194.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!