3、gucci_pyqt
<pre><code>from bs4 import BeautifulSoup
import urllib.request
# import pandas as pd
import ssl
import time
import random
import xlsxwriter
import re
import json
import os
import pickle
import socket
import sys
from PyQt5.QtWidgets import QApplication, QWidget ,QPushButton,QHBoxLayout,QTextEdit
#设置超时时间为30s
socket.setdefaulttimeout(30)
ssl._create_default_https_context = ssl._create_unverified_context
class GucciDownload:
error_message = '' # 静态字段,在类中保存,将对象中共有的字段和值可以保存到静态字段
download_message = ''
# def __init__(self,
# error_message = '' ,
# download_message = ''):
# return None
# self.error_message
# global mGucciDownload = GucciDownload()
def mkdir(path):
path = path.strip()
path = path.rstrip("\\")
isExists = os.path.exists(path)
# 判断结果
if not isExists:
os.makedirs(path)
print(path + ' 创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path + ' 目录已存在')
return False
# 返回html的soup解析
def openUrl(url):
#headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
it_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'}
req = urllib.request.Request(url, headers=it_header)
response = urllib.request.urlopen(req) #请求
html = response.read().decode("utf-8")
#print(html)
Soup = BeautifulSoup(html, 'lxml')
return Soup
# 返回html的soup解析
def return_json(url):
#headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
it_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'}
req = urllib.request.Request(url, headers=it_header)
response = urllib.request.urlopen(req) #请求
html = response.read().decode("utf-8")
#print(html)
#Soup = BeautifulSoup(html, 'lxml')
return html
# 正则化str
def re_str(str):
return str.replace(' ','').replace('\r','').replace('\n','').replace('\t','')
# 下载图片到本地
def downUrl(url,path_name):
urllib.request.urlretrieve(url, path_name)
print(path_name+": success")
return None
def function(date):
return date['product_prices']
def download_gucci_zh(download_type):
# *************************************
# gucci_zh 获取时间戳 组装url 新建文件夹
# 下载信息
gucci_zh_download_message = '开始下载' + download_type + '...' + '\n'
mGucciDownload.download_message = mGucciDownload.download_message + gucci_zh_download_message
qTextMessage.setText(mGucciDownload.download_message)
now_time = time.time()
new_now_time = int(round(now_time * 1000)) #毫秒级时间戳
print(new_now_time)##1544524915363
img_save_path = './gucci/'+ download_type + '/gucci_zh_/'
mkdir(img_save_path)
# url_men_all_clothes = 'https://www.gucci.cn/zh/itemList?pn=2&ni=97&direction=down&listName=ProductGrid&_=1544277575004'
# url = 'https://www.gucci.cn/zh/itemList?ni=16&pn=3&direction=down&_=1544276231765'
# belt_url = 'https://www.gucci.cn/zh/ca/men/accessories/belts?pn=1'
# 'https://www.gucci.cn/zh/itemList?pn=2&ni=97&direction=down&listName=ProductGrid&_=1544627533425'
men_all_clothes = 97
men_all_shoes = 17
men_all_bags = 16
men_all_belts = 64
men_all_wallets = 63
women_all_clothes = 90
women_all_shoes = 13
women_all_bags = 12
women_all_belts = 41
women_all_wallets = 40
zh_url_i = 'https://www.gucci.cn/zh/itemList?ni=' #'16&pn='#'https://www.gucci.cn/zh/itemList?pn='
zh_url_o = '&direction=down&listName=ProductGrid&_=' + str(new_now_time) # &direction=down&_= '&ni=63&direction=down&listName=ProductGrid&_=1544011708050'
zh_url_ni = ''
if download_type == 'men_all_shoes':
zh_url_ni = men_all_shoes
elif download_type == 'men_all_bags':
zh_url_ni = men_all_bags
elif download_type == 'men_all_clothes':
zh_url_ni = men_all_clothes
elif download_type == 'men_all_belts':
zh_url_ni = men_all_belts
elif download_type == 'men_all_wallets':
zh_url_ni = men_all_wallets
elif download_type == 'women_all_shoes':
zh_url_ni = women_all_shoes
elif download_type == 'women_all_bags':
zh_url_ni = women_all_bags
elif download_type == 'women_all_clothes':
zh_url_ni = women_all_clothes
elif download_type == 'women_all_belts':
zh_url_ni = women_all_belts
elif download_type == 'women_all_wallets':
zh_url_ni = women_all_wallets
zh_goods_list = []
zh_goods_i_id = 0
for page_number in range (1,12):
#page_number = 2
url = zh_url_i + str(zh_url_ni) + '&pn=' + str(page_number) + zh_url_o
# Soup = BeautifulSoup('html', 'lxml')
try:
Soup = openUrl(url)
except Exception as e:
print('error: '+url)
print(e)
continue
# 双次出现--商品名字、商品价格、商品编号
swipers = Soup.find_all(class_='spice-item-grid-info')
goods_number = Soup.find_all(class_='spice-item-grid-img-box e-abtest-code-click')
goods_prices = Soup.find_all(class_='spice-item-grid-price')
# 单次出现--imgurl 商品图片链接
imgs = Soup.find_all(class_='visual-img')
if(len(swipers) == 0):
# 没有信息,结束循环
print(page_number)
break
else:
for i in range(0,len(imgs)):
# goods_number\swipers\goods_prices 为2次重复出现 所以2*i
goods_item = goods_number[2*i]['e-abtest-code']
goods_position = goods_number[2*i]['e-abtest-position']
goods_name = swipers[2*i].h2.text
goods_price = re_str(goods_prices[2*i].text).replace(',','').replace('.','')
# 提取价格转为int
product_price = re.findall("\d+", goods_price)[0]
# imgs为单次出现
imgurls = json.loads(imgs[i]['spice-data-image-src'])
# imgurls = re_str(imgs[i]['spice-data-image-src'])
# img = imgurls.split(",")
# img_url = img[1].split(":")
# imgurl = 'https:'+img_url[2]
# goods_img_url = imgurl[0:len(imgurl)-1]
goods_img_url = imgurls['medium']
goods_url = 'https://www.gucci.cn/zh/pr/'+ goods_item + '?nid=63&listName=ProductGrid&position=37&categoryPath='
try:
if download:
downUrl(goods_img_url, img_save_path + str(zh_goods_i_id) + '.jpg')
zh_it_dict = {'product_title': goods_name,
'product_url': goods_url,
'product_prices': int(product_price),
'product_all_prices': '',
'product_img_url': goods_img_url,
'url_path': img_save_path + str(zh_goods_i_id) + '.jpg',
'productCode': goods_item
}
zh_goods_list.append(zh_it_dict)
zh_goods_i_id = zh_goods_i_id + 1
except Exception as e:
print('downerror: '+goods_img_url)
print(e)
continue
gucci_zh_download_message = gucci_zh_download_message + goods_name + '\n' + goods_price + '\n'
mGucciDownload.download_message = mGucciDownload.download_message + gucci_zh_download_message
qTextMessage.setText(mGucciDownload.download_message)
print(goods_item)
print(goods_name)
print(goods_price)
print(goods_url)
print(page_number)
print(zh_goods_i_id)
zh_goods_list.sort(key=function)
pickle.dump(zh_goods_list,open('./gucci/' + download_type + '/zh_goods_list.txt', 'wb') )
return None
# *************************************
# *************************************
# gucci_it 组装url 新建文件夹
def download_gucci_it(download_type):
# 下载信息
gucci_zh_download_message = '开始下载' + download_type + '...' + '\n'
img_save_path = './gucci/' + download_type + '/gucci_it_/'
mkdir(img_save_path)
# 意大利 男 包 衣服 鞋子 腰带 钱包
it_men_bags = 'men-bags'
it_men_cloths = 'men-readytowear'
it_men_shoes = 'men-shoes'
it_men_belts = 'men-accessories-belts'
it_men_wallets = 'men-accessories-wallets'
it_women_bags = 'women-handbags'
it_women_cloths = 'women-readytowear'
it_women_shoes = 'women-shoes'
it_women_belts = 'women-accessories-belts'
it_women_wallets = 'women-accessories-wallets'
# 组装链接
it_url_in = 'https://www.gucci.com/it/it/c/productgrid?categoryCode='
it_url_out = '&show=Page&page='
it_url_type = ''
if download_type == 'men_all_shoes':
it_url_type = it_men_shoes
elif download_type == 'men_all_bags':
it_url_type = it_men_bags
elif download_type == 'men_all_clothes':
it_url_type = it_men_cloths
elif download_type == 'men_all_belts':
it_url_type = it_men_belts
elif download_type == 'men_all_wallets':
it_url_type = it_men_wallets
elif download_type == 'women_all_shoes':
it_url_type = it_women_shoes
elif download_type == 'women_all_bags':
it_url_type = it_women_bags
elif download_type == 'women_all_clothes':
it_url_type = it_women_cloths
elif download_type == 'women_all_belts':
it_url_type = it_women_belts
elif download_type == 'women_all_wallets':
it_url_type = it_women_wallets
it_url = it_url_in + it_url_type + it_url_out
it_goods_list = []
# 组装商品链接
it_goods_url_in = 'https://www.gucci.com/it/it/search?search-cat=header-search&text='
it_goods_i_id = 0
print('loading.......')
for page_number in range(1,10):
url = it_url + str(page_number)
print(url)
json_data = ''
try:
it_header = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'}
req = urllib.request.Request(url, headers=it_header)
response = urllib.request.urlopen(req) # 请求
json_data = response.read().decode("utf-8")
# json_data = return_json(url)
except Exception as e:
print('error: '+url)
print(e)
continue
# except urllib.error.URLError as e:
# print(e)
# print('URLError: '+url)
# continue
# except socket.timeout as e:
# count = 1
# print('第'+str(count))
# while count <= 5:
# try:
# json_data = return_json(url)
# except socket.timeout:
# err_info = 'Reloading for %d time' % count if count == 1 else 'Reloading for %d times' % count
# print(err_info)
# count += 1
# if count > 5:
# print("downloading picture fialed!"+img_url)
# continue
# except Exception as e:
# print('error: '+url)
# print(e)
# continue
hjson = json.loads(json_data)
items = hjson['products']['items']
if (len(items)>0):
for i in range(0,len(items)):
# 产品id、价格、产品名称、图片
productCode = items[i]['productCode']
price = items[i]['price'].replace(',','').replace('.','')
# 提取价格转为int
product_price = re.findall("\d+", price)[0]
productName = items[i]['productName']
alternateImage = items[i]['primaryImage']
img_url = 'http:'+alternateImage['datasrcmedium']
if download:
try:
print('img_url: ' + img_url)
urllib.request.urlretrieve(img_url, img_save_path + str(it_goods_i_id) + '.jpg')
it_dict = {'product_title': productName,
'product_url': it_goods_url_in + productCode,
'product_prices': int(product_price),
'product_all_prices': '', 'product_img_url': img_url,
'url_path': img_save_path + str(it_goods_i_id) + '.jpg',
'productCode': productCode
}
it_goods_list.append(it_dict)
it_goods_i_id = it_goods_i_id + 1
except Exception as e:
print('error: ')
print(e)
continue
# except socket.timeout:
# count = 1
# print('timeout: '+str(count))
# while count <= 5:
# try:
# print('第二次尝试timeout:'+str(count))
# urllib.request.urlretrieve(img_url, img_save_path + str(it_goods_i_id) + '.jpg')
# # 第二次解决后 再加入list 否则跳过
# it_dict = {'product_title': productName,
# 'product_url': it_goods_url_in + productCode,
# 'product_prices': int(product_price),
# 'product_all_prices': '', 'product_img_url': img_url,
# 'url_path': img_save_path + str(it_goods_i_id) + '.jpg',
# 'productCode': productCode
# }
# it_goods_list.append(it_dict)
# it_goods_i_id = it_goods_i_id + 1
# # break
# except socket.timeout:
# err_info = 'Reloading for %d time' % count if count == 1 else 'Reloading for %d times' % count
# print(err_info)
# count += 1
# if count > 5:
# print("downloading picture fialed!"+img_url)
# continue
print(productCode)
print(price)
print(productName)
print(img_url)
else:
# 没有信息 跳出循环
break
it_goods_list.sort(key=function)
pickle.dump(it_goods_list,open('./gucci/' + download_type + '/it_goods_list.txt', 'wb') )
print('**********************************')
print('**********************************')
print('**********************************')
print(it_goods_list)
return None
# *************************************
# gucci_kaola 组装url 新建文件夹
def download_kaola(download_type):
img_save_path = './gucci/' + download_type + '/kaola_zh_/'
mkdir(img_save_path)
kaola_list = []
men_shoes_url_in = 'https://search.kaola.com/brand/1226-1078.html?pageSize=60&pageNo='
men_shoes_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1078&key=&changeContent=c&#search_crumbs'
men_wallets_url_in = 'https://www.kaola.com/brand/1226-6259.html?pageSize=60&pageNo='
men_wallets_url_out = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111380&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=6259&key=&changeContent=isStock&#search_crumbs'
men_bag_url_i = 'https://search.kaola.com/brand/1226-1027.html?pageSize=60&pageNo='
men_bag_url_O = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1027&key=&changeContent=c&#search_crumbs'
men_cloths_url_i = 'https://search.kaola.com/brand/1226-1047.html?pageSize=60&pageNo='
men_cloths_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1047&key=&changeContent=c&#search_crumbs'
men_belt_url_i = 'https://search.kaola.com/brand/1226-1073.html?pageSize=60&pageNo='
men_belt_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111380&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=1058&backCategory=1073&key=&changeContent=0&#search_crumbs'
women_shoes_url_in = 'https://search.kaola.com/brand/1226-1077.html?pageSize=60&pageNo='
women_shoes_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111421&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1077&key=&changeContent=c&#search_crumbs'
women_wallets_url_in = 'https://search.kaola.com/brand/1226-6259.html?pageSize=60&pageNo='
women_wallets_url_out = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111421&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=6259&key=&changeContent=0&#search_crumbs'
women_bag_url_i = 'https://search.kaola.com/brand/1226-1028.html?pageSize=60&pageNo='
women_bag_url_O = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111421&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1028&key=&changeContent=c&#search_crumbs'
women_cloths_url_i = 'https://search.kaola.com/brand/1226-1048.html?pageSize=60&pageNo='
women_cloths_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=1048&key=&changeContent=crumbs_0&#search_crumbs'
women_belt_url_i = 'https://search.kaola.com/brand/1226-1073.html?pageSize=60&pageNo='
women_belt_url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111421&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=1058&backCategory=1073&key=&changeContent=c&#search_crumbs'
url_in = ''
url_out = ''
if download_type == 'men_all_shoes':
url_in = men_shoes_url_in
url_out = men_shoes_url_o
elif download_type == 'men_all_bags':
url_in = men_bag_url_i
url_out = men_bag_url_O
elif download_type == 'men_all_clothes':
url_in = men_cloths_url_i
url_out = men_cloths_url_o
elif download_type == 'men_all_belts':
url_in = men_belt_url_i
url_out = men_belt_url_o
elif download_type == 'men_all_wallets':
url_in = men_wallets_url_in
url_out = men_wallets_url_out
elif download_type == 'women_all_shoes':
url_in = women_shoes_url_in
url_out = women_shoes_url_o
elif download_type == 'women_all_bags':
url_in = women_bag_url_i
url_out = women_bag_url_O
elif download_type == 'women_all_clothes':
url_in = women_cloths_url_i
url_out = women_cloths_url_o
elif download_type == 'women_all_belts':
url_in = women_belt_url_i
url_out = women_belt_url_o
elif download_type == 'women_all_wallets':
url_in = women_wallets_url_in
url_out = women_wallets_url_out
# url_i = 'https://www.kaola.com/brand/1226-6259.html?pageSize=60&pageNo='
# url_o = '&sortfield=0&isStock=true&isSelfProduct=false&isPromote=false&isTaxFree=false&isDesc=true&proIds=100224_4111380&lowerPrice=-1&upperPrice=-1&isBrand=0&headCategoryId=-1&backCategory=6259&key=&#topTab'
#page_number = 3
product_number = 0
for page_number in range(1,10):
print(page_number)
url = url_in + str(page_number) + url_out
try:
Soup = openUrl(url)
except Exception as e:
print(url)
print('openUrl: ')
print(e)
continue
# 起始分别从 12 11 11 11 1开始 数组位置从11 10 10 10 0开始
product_titles = Soup.find_all(class_="title")
goods_prices_curs = Soup.find_all(class_='cur')
goods_imgs = Soup.find_all(class_='img')
goods_all_prices = Soup.find_all(class_='price')
goods_colors = Soup.find_all(class_='skuwrap')
sale_names = Soup.find_all(class_='selfflag')
if(len(sale_names) == 0):
break
for list_i in range(0,len(goods_all_prices)-10):
# 产品名称 、 产品链接 、产品到手价格 、 产品图片链接 、 产品所有价格
print('***************************************')
product_title = product_titles[list_i + 11]['title']
# 如果是钱包 就略过
#if('钱包' in product_title):
# continue
# product_url = 'https://goods.kaola.com' + product_titles[list_i + 11]['href']
product_url_b = product_titles[list_i + 11]['href']
if('goods.kaola.com' in product_url_b):
product_url = 'https:' + product_titles[list_i + 11]['href']
else:
product_url = 'https://goods.kaola.com' + product_titles[list_i + 11]['href']
product_prices = goods_prices_curs[list_i + 10].text
product_sales_name = re_str(sale_names[list_i].text)
product_price = re.findall("\d+", product_prices)[0]
#img_url = goods_imgs[10].img['data-src']
#product_img_url = 'http://' + img_url[2:len(img_url)]
product_all_prices = goods_all_prices[list_i + 10].text
print(product_title)
print(product_url)
print(product_prices)
print(product_all_prices)
img_url = goods_imgs[list_i + 10].img['data-src']
product_img_url = 'http://' + img_url[2:len(img_url)]
product_other_color = '无'
try:
if (len(goods_colors) > 0):
goods_color_list = goods_colors[list_i].find_all(class_='skutag')
product_other_color = ''
for color_i in range(0, len(goods_color_list)):
product_one_color = goods_color_list[color_i]['title']
product_other_color = product_one_color + ',' + product_other_color
except Exception as e:
print(e)
continue
try:
if download:
downUrl(product_img_url, img_save_path + str(product_number) + '.jpg')
print(product_img_url)
# time.sleep(random.randint(0, 1))
kaola_dict = {'product_title': product_title,
'product_url': product_url,
'product_prices': int(product_price),
'product_all_prices': product_all_prices,
'product_img_url':product_img_url,
'product_sales_name':product_sales_name,
'product_other_color':product_other_color,
'url_path':img_save_path + str(product_number) + '.jpg'}
kaola_list.append(kaola_dict)
print(product_sales_name)
print(product_other_color)
print(product_number)
product_number = product_number + 1
except Exception as e:
print("down error: ")
print(e)
continue
print('---------------------------------------')
kaola_list.sort(key=function)
pickle.dump(kaola_list,open('./gucci/' + download_type + '/kaola_list.txt', 'wb') )
print(kaola_list)
print('**********************************')
print('**********************************')
print('**********************************')
return None
# def write_all_excel(book, download_type):
# sheet = book.add_worksheet(download_type)
def write_excel(download_type, book):
# '''
# 读取变量
kaola_list = pickle.load(open('./gucci/' + download_type + '/kaola_list.txt', 'rb'))
it_goods_list = pickle.load(open('./gucci/' + download_type + '/it_goods_list.txt', 'rb'))
zh_goods_list = pickle.load(open('./gucci/' + download_type + '/zh_goods_list.txt', 'rb'))
sheet = book.add_worksheet(download_type)
# 设置sheet表单元格列宽
sheet.set_column("A:A", 5 ) # 欧洲
sheet.set_column("B:B", 80.88) # 商品名称
sheet.set_column("C:C", 10.5 ) # 正面图
sheet.set_column("D:D", 19.38) # 货号
sheet.set_column("E:E", 12 ) # 欧洲零售价
sheet.set_column("F:F", 82 ) # 商品链接
sheet.set_column("G:G", 5 ) # 中国
sheet.set_column("H:H", 55 ) # 商品名称
sheet.set_column("I:I", 10.5 ) # 正面图
sheet.set_column("J:J", 19.38) # 货号
sheet.set_column("K:K", 8.25 ) # 欧洲零售价
sheet.set_column("L:L", 106 ) # 商品链接
sheet.set_column("M:M", 5 ) # 考拉
sheet.set_column("N:N", 89 ) # 商品名称
sheet.set_column("O:O", 11.5 ) # 正面图
sheet.set_column("P:P", 50.38) # 商品链接
sheet.set_column("Q:Q", 14.25 ) # 到手价
sheet.set_column("R:R", 25.75) # 所有售价
sheet.set_column("S:S", 25.75) # 备选颜色
sheet.set_column("T:T", 28.75) # 销售商家
# 设定整个sheet表的单元格的格式
property = {
'font_size': 11, # 字体大小
'bold': False, # 是否加粗
'align': 'center', # 水平对齐方式 left
'valign': 'vcenter', # 垂直对齐方式
'font_name': u'微软雅黑',
'text_wrap': False, # 是否自动换行
}
cell_format = book.add_format(property)
# 设置sheet表单元格行高
sheet.set_row(0, 22) # 设置第一行的高度为22
# 在向单元格中写入内容时,加上单元格样式
# 插入第一行
sheet.write(0, 0, '欧洲', cell_format)
sheet.write(0, 1, '商品名称', cell_format)
sheet.write(0, 2, '正面图', cell_format)
sheet.write(0, 3, '货号', cell_format)
sheet.write(0, 4, '欧洲零售价', cell_format)
sheet.write(0, 5, '商品链接', cell_format)
sheet.write(0, 6, '中国', cell_format)
sheet.write(0, 7, '商品名称', cell_format)
sheet.write(0, 8, '正面图', cell_format)
sheet.write(0, 9, '货号', cell_format)
sheet.write(0, 10, '中国零售价', cell_format)
sheet.write(0, 11, '商品链接', cell_format)
sheet.write(0, 12, '考拉', cell_format)
sheet.write(0, 13, '商品名称', cell_format)
sheet.write(0, 14, '正面图', cell_format)
sheet.write(0, 15, '商品链接', cell_format)
sheet.write(0, 16, '到手价', cell_format)
sheet.write(0, 17, '所有售价', cell_format)
sheet.write(0, 18, '备选颜色', cell_format)
sheet.write(0, 19, '销售商家', cell_format)
gucci_zh_img_format = {'x_offset':4, # 左右移动
'y_offset':0,
'x_scale':0.3, # 缩放比例
'y_scale':0.29}
img_format = {'x_offset':4, # 左右移动
'y_offset':0,
'x_scale':0.2, # 缩放比例
'y_scale':0.19}
# 插入爬取zh_goods_list信息
row_number = 1
for product in zh_goods_list:
sheet.set_row(row_number, 52) # 设置第row_number行的高度为52
sheet.write(row_number, 7, product['product_title'], cell_format)
sheet.insert_image(row_number, 8, product['url_path'], gucci_zh_img_format)
sheet.write(row_number, 9, product['productCode'], cell_format) #product['productCode']
sheet.write(row_number, 10, '¥'+str(product['product_prices']), cell_format)
sheet.write(row_number, 11, product['product_url'], cell_format)
row_number = row_number + 1
# 插入爬取it_goods_list信息
row_number = 1
for product in it_goods_list:
sheet.set_row(row_number, 52) # 设置第row_number行的高度为52
sheet.write(row_number, 1, product['product_title'], cell_format)
sheet.insert_image(row_number, 2, product['url_path'], img_format)
sheet.write(row_number, 3, product['productCode'], cell_format) #product['productCode']
sheet.write(row_number, 4, '€'+str(product['product_prices']), cell_format)
sheet.write(row_number, 5, product['product_url'], cell_format)
row_number = row_number + 1
# 插入爬取kaola_list信息
row_number = 1
for product in kaola_list:
sheet.set_row(row_number, 52) # 设置第row_number行的高度为52
sheet.write(row_number, 13, product['product_title'], cell_format)
sheet.insert_image(row_number, 14, product['url_path'], img_format)
sheet.write(row_number, 15, product['product_url'], cell_format)
sheet.write(row_number, 16, '¥'+str(product['product_prices']), cell_format)
sheet.write(row_number, 17, product['product_all_prices'], cell_format)
sheet.write(row_number, 18, product['product_other_color'], cell_format)
sheet.write(row_number, 19, product['product_sales_name'], cell_format)
row_number = row_number + 1
# book.close()
print(download_type + ': 写入EXCEL成功')
return book
def get_all_list():
men_all = ['men_all_shoes', 'men_all_bags', 'men_all_belts', 'men_all_clothes', 'men_all_wallets']
women_all = ['women_all_shoes', 'women_all_bags', 'women_all_belts', 'women_all_clothes', 'women_all_wallets']
return men_all,women_all
def write_excel_to_path(men_all, excel_name):
book = xlsxwriter.Workbook(excel_name)
for i in range(0, len(men_all)):
print(men_all[i])
book = write_excel(men_all[i], book)
book.close()
print(excel_name + ': 写入EXCEL成功')
return None
# 1、更改 当前下载类型
# 2、更改个URL
# 3、打开下载开关
# men_all_types = []
# men_all_types.append('men_all_shoes')
# 'men_all_shoes' 'men_all_bags' 'men_all_belts' 'men_all_clothes' 'men_all_wallets'
# 'women_all_shoes' 'women_all_bags' 'women_all_belts' 'women_all_clothes' 'women_all_wallets'
# download_type = 'women_all_wallets'
# #download = False #True
# download_gucci_zh(download_type)
# download_gucci_it(download_type)
# download_kaola(download_type)
# write_excel(download_type)
def strat_download():
men_all,women_all = get_all_list()
for i in range(0, len(men_all)):
if(men_all[i] == 'men_all_shoes'):
download_gucci_zh(men_all[i])
download_gucci_it(men_all[i])
download_kaola(men_all[i])
else:
download_gucci_zh(men_all[i])
download_gucci_it(men_all[i])
download_kaola(men_all[i])
for i in range(0, len(women_all)):
download_gucci_zh(women_all[i])
download_gucci_it(women_all[i])
download_kaola(women_all[i])
write_excel_to_path(men_all, 'men_all.xlsx')
write_excel_to_path(women_all, 'women_all.xlsx')
if __name__ == "__main__":
# strat_download()
error_message = ''
# mGucciDownload = GucciDownload()
# print(mGucciDownload.download_message)
# mGucciDownload.download_message = 'abc'
# print(mGucciDownload.download_message)
# mGucciDownload.download_message = 'abcfgr'
# print(mGucciDownload.download_message)
global mGucciDownload
mGucciDownload = GucciDownload()
mGucciDownload.download_message = '准备下载。。。请点击开始进行下载。。。'
download = True #True
str_text = 'hello word'
app = QApplication(sys.argv)
qWidget = QWidget()
# 大小 位置
qWidget.resize(350, 450)
qWidget.move(400, 300)
# 设置标题名称
qWidget.setWindowTitle('Gucci_product')
# 文本框1
global qTextMessage
qTextMessage = QTextEdit()
qTextMessage.setText(mGucciDownload.download_message)
# 文本框2
# qTextErrorMessage = QTextEdit()
# qTextErrorMessage.setText('准备下载。。。请点击开始进行下载。。。')
# 开始按钮
updateBtn = QPushButton('开始下载')
updateBtn.setStyleSheet(''' text-align : center;
background-color : NavajoWhite;
height : 30px;
border-style: outset;
font : 13px ''')
# 监听按钮点击后调用函数
updateBtn.clicked.connect(strat_download)
# 新建一个布局 添加各个组件
hLayout = QHBoxLayout()
hLayout.addWidget(updateBtn)
hLayout.addWidget(qTextMessage)
# hLayout.addWidget(qTextErrorMessage)
# 添加布局到窗口
qWidget.setLayout(hLayout)
qWidget.show()
sys.exit(app.exec_())
</code></pre>