python


3、gucci_pyqt

<pre><code>from bs4 import BeautifulSoup import urllib.request # import pandas as pd import ssl import time import random import xlsxwriter import re import json import os import pickle import socket import sys from PyQt5.QtWidgets import QApplication, QWidget ,QPushButton,QHBoxLayout,QTextEdit #设置超时时间为30s socket.setdefaulttimeout(30) ssl._create_default_https_context = ssl._create_unverified_context class GucciDownload: error_message = '' # 静态字段,在类中保存,将对象中共有的字段和值可以保存到静态字段 download_message = '' # def __init__(self, # error_message = '' , # download_message = ''): # return None # self.error_message # global mGucciDownload = GucciDownload() def mkdir(path): path = path.strip() path = path.rstrip("\\") isExists = os.path.exists(path) # 判断结果 if not isExists: os.makedirs(path) print(path + ' 创建成功') return True else: # 如果目录存在则不创建,并提示目录已存在 print(path + ' 目录已存在') return False # 返回html的soup解析 def openUrl(url): #headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'} it_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'} req = urllib.request.Request(url, headers=it_header) response = urllib.request.urlopen(req) #请求 html = response.read().decode("utf-8") #print(html) Soup = BeautifulSoup(html, 'lxml') return Soup # 返回html的soup解析 def return_json(url): #headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'} it_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'} req = urllib.request.Request(url, headers=it_header) response = urllib.request.urlopen(req) #请求 html = response.read().decode("utf-8") #print(html) #Soup = BeautifulSoup(html, 'lxml') return html # 正则化str def re_str(str): return str.replace(' ','').replace('\r','').replace('\n','').replace('\t','') # 下载图片到本地 def downUrl(url,path_name): urllib.request.urlretrieve(url, path_name) print(path_name+": success") return None def function(date): return date['product_prices'] def download_gucci_zh(download_type): # ************************************* # gucci_zh 获取时间戳 组装url 新建文件夹 # 下载信息 gucci_zh_download_message = '开始下载' + download_type + '...' + '\n' mGucciDownload.download_message = mGucciDownload.download_message + gucci_zh_download_message qTextMessage.setText(mGucciDownload.download_message) now_time = time.time() new_now_time = int(round(now_time * 1000)) #毫秒级时间戳 print(new_now_time)##1544524915363 img_save_path = './gucci/'+ download_type + '/gucci_zh_/' mkdir(img_save_path) # url_men_all_clothes = 'https://www.gucci.cn/zh/itemList?pn=2&amp;ni=97&amp;direction=down&amp;listName=ProductGrid&amp;_=1544277575004' # url = 'https://www.gucci.cn/zh/itemList?ni=16&amp;pn=3&amp;direction=down&amp;_=1544276231765' # belt_url = 'https://www.gucci.cn/zh/ca/men/accessories/belts?pn=1' # 'https://www.gucci.cn/zh/itemList?pn=2&amp;ni=97&amp;direction=down&amp;listName=ProductGrid&amp;_=1544627533425' men_all_clothes = 97 men_all_shoes = 17 men_all_bags = 16 men_all_belts = 64 men_all_wallets = 63 women_all_clothes = 90 women_all_shoes = 13 women_all_bags = 12 women_all_belts = 41 women_all_wallets = 40 zh_url_i = 'https://www.gucci.cn/zh/itemList?ni=' #'16&amp;pn='#'https://www.gucci.cn/zh/itemList?pn=' zh_url_o = '&amp;direction=down&amp;listName=ProductGrid&amp;_=' + str(new_now_time) # &amp;direction=down&amp;_= '&amp;ni=63&amp;direction=down&amp;listName=ProductGrid&amp;_=1544011708050' zh_url_ni = '' if download_type == 'men_all_shoes': zh_url_ni = men_all_shoes elif download_type == 'men_all_bags': zh_url_ni = men_all_bags elif download_type == 'men_all_clothes': zh_url_ni = men_all_clothes elif download_type == 'men_all_belts': zh_url_ni = men_all_belts elif download_type == 'men_all_wallets': zh_url_ni = men_all_wallets elif download_type == 'women_all_shoes': zh_url_ni = women_all_shoes elif download_type == 'women_all_bags': zh_url_ni = women_all_bags elif download_type == 'women_all_clothes': zh_url_ni = women_all_clothes elif download_type == 'women_all_belts': zh_url_ni = women_all_belts elif download_type == 'women_all_wallets': zh_url_ni = women_all_wallets zh_goods_list = [] zh_goods_i_id = 0 for page_number in range (1,12): #page_number = 2 url = zh_url_i + str(zh_url_ni) + '&amp;pn=' + str(page_number) + zh_url_o # Soup = BeautifulSoup('html', 'lxml') try: Soup = openUrl(url) except Exception as e: print('error: '+url) print(e) continue # 双次出现--商品名字、商品价格、商品编号 swipers = Soup.find_all(class_='spice-item-grid-info') goods_number = Soup.find_all(class_='spice-item-grid-img-box e-abtest-code-click') goods_prices = Soup.find_all(class_='spice-item-grid-price') # 单次出现--imgurl 商品图片链接 imgs = Soup.find_all(class_='visual-img') if(len(swipers) == 0): # 没有信息,结束循环 print(page_number) break else: for i in range(0,len(imgs)): # goods_number\swipers\goods_prices 为2次重复出现 所以2*i goods_item = goods_number[2*i]['e-abtest-code'] goods_position = goods_number[2*i]['e-abtest-position'] goods_name = swipers[2*i].h2.text goods_price = re_str(goods_prices[2*i].text).replace(',','').replace('.','') # 提取价格转为int product_price = re.findall("\d+", goods_price)[0] # imgs为单次出现 imgurls = json.loads(imgs[i]['spice-data-image-src']) # imgurls = re_str(imgs[i]['spice-data-image-src']) # img = imgurls.split(",") # img_url = img[1].split(":") # imgurl = 'https:'+img_url[2] # goods_img_url = imgurl[0:len(imgurl)-1] goods_img_url = imgurls['medium'] goods_url = 'https://www.gucci.cn/zh/pr/'+ goods_item + '?nid=63&amp;listName=ProductGrid&amp;position=37&amp;categoryPath=' try: if download: downUrl(goods_img_url, img_save_path + str(zh_goods_i_id) + '.jpg') zh_it_dict = {'product_title': goods_name, 'product_url': goods_url, 'product_prices': int(product_price), 'product_all_prices': '', 'product_img_url': goods_img_url, 'url_path': img_save_path + str(zh_goods_i_id) + '.jpg', 'productCode': goods_item } zh_goods_list.append(zh_it_dict) zh_goods_i_id = zh_goods_i_id + 1 except Exception as e: print('downerror: '+goods_img_url) print(e) continue gucci_zh_download_message = gucci_zh_download_message + goods_name + '\n' + goods_price + '\n' mGucciDownload.download_message = mGucciDownload.download_message + gucci_zh_download_message qTextMessage.setText(mGucciDownload.download_message) print(goods_item) print(goods_name) print(goods_price) print(goods_url) print(page_number) print(zh_goods_i_id) zh_goods_list.sort(key=function) pickle.dump(zh_goods_list,open('./gucci/' + download_type + '/zh_goods_list.txt', 'wb') ) return None # ************************************* # ************************************* # gucci_it 组装url 新建文件夹 def download_gucci_it(download_type): # 下载信息 gucci_zh_download_message = '开始下载' + download_type + '...' + '\n' img_save_path = './gucci/' + download_type + '/gucci_it_/' mkdir(img_save_path) # 意大利 男 包 衣服 鞋子 腰带 钱包 it_men_bags = 'men-bags' it_men_cloths = 'men-readytowear' it_men_shoes = 'men-shoes' it_men_belts = 'men-accessories-belts' it_men_wallets = 'men-accessories-wallets' it_women_bags = 'women-handbags' it_women_cloths = 'women-readytowear' it_women_shoes = 'women-shoes' it_women_belts = 'women-accessories-belts' it_women_wallets = 'women-accessories-wallets' # 组装链接 it_url_in = 'https://www.gucci.com/it/it/c/productgrid?categoryCode=' it_url_out = '&amp;show=Page&amp;page=' it_url_type = '' if download_type == 'men_all_shoes': it_url_type = it_men_shoes elif download_type == 'men_all_bags': it_url_type = it_men_bags elif download_type == 'men_all_clothes': it_url_type = it_men_cloths elif download_type == 'men_all_belts': it_url_type = it_men_belts elif download_type == 'men_all_wallets': it_url_type = it_men_wallets elif download_type == 'women_all_shoes': it_url_type = it_women_shoes elif download_type == 'women_all_bags': it_url_type = it_women_bags elif download_type == 'women_all_clothes': it_url_type = it_women_cloths elif download_type == 'women_all_belts': it_url_type = it_women_belts elif download_type == 'women_all_wallets': it_url_type = it_women_wallets it_url = it_url_in + it_url_type + it_url_out it_goods_list = [] # 组装商品链接 it_goods_url_in = 'https://www.gucci.com/it/it/search?search-cat=header-search&amp;text=' it_goods_i_id = 0 print('loading.......') for page_number in range(1,10): url = it_url + str(page_number) print(url) json_data = '' try: it_header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'} req = urllib.request.Request(url, headers=it_header) response = urllib.request.urlopen(req) # 请求 json_data = response.read().decode("utf-8") # json_data = return_json(url) except Exception as e: print('error: '+url) print(e) continue # except urllib.error.URLError as e: # print(e) # print('URLError: '+url) # continue # except socket.timeout as e: # count = 1 # print('第'+str(count)) # while count &lt;= 5: # try: # json_data = return_json(url) # except socket.timeout: # err_info = 'Reloading for %d time' % count if count == 1 else 'Reloading for %d times' % count # print(err_info) # count += 1 # if count &gt; 5: # print("downloading picture fialed!"+img_url) # continue # except Exception as e: # print('error: '+url) # print(e) # continue hjson = json.loads(json_data) items = hjson['products']['items'] if (len(items)&gt;0): for i in range(0,len(items)): # 产品id、价格、产品名称、图片 productCode = items[i]['productCode'] price = items[i]['price'].replace(',','').replace('.','') # 提取价格转为int product_price = re.findall("\d+", price)[0] productName = items[i]['productName'] alternateImage = items[i]['primaryImage'] img_url = 'http:'+alternateImage['datasrcmedium'] if download: try: print('img_url: ' + img_url) urllib.request.urlretrieve(img_url, img_save_path + str(it_goods_i_id) + '.jpg') it_dict = {'product_title': productName, 'product_url': it_goods_url_in + productCode, 'product_prices': int(product_price), 'product_all_prices': '', 'product_img_url': img_url, 'url_path': img_save_path + str(it_goods_i_id) + '.jpg', 'productCode': productCode } it_goods_list.append(it_dict) it_goods_i_id = it_goods_i_id + 1 except Exception as e: print('error: ') print(e) continue # except socket.timeout: # count = 1 # print('timeout: '+str(count)) # while count &lt;= 5: # try: # print('第二次尝试timeout:'+str(count)) # urllib.request.urlretrieve(img_url, img_save_path + str(it_goods_i_id) + '.jpg') # # 第二次解决后 再加入list 否则跳过 # it_dict = {'product_title': productName, # 'product_url': it_goods_url_in + productCode, # 'product_prices': int(product_price), # 'product_all_prices': '', 'product_img_url': img_url, # 'url_path': img_save_path + str(it_goods_i_id) + '.jpg', # 'productCode': productCode # } # it_goods_list.append(it_dict) # it_goods_i_id = it_goods_i_id + 1 # # break # except socket.timeout: # err_info = 'Reloading for %d time' % count if count == 1 else 'Reloading for %d times' % count # print(err_info) # count += 1 # if count &gt; 5: # print("downloading picture fialed!"+img_url) # continue print(productCode) print(price) print(productName) print(img_url) else: # 没有信息 跳出循环 break it_goods_list.sort(key=function) pickle.dump(it_goods_list,open('./gucci/' + download_type + '/it_goods_list.txt', 'wb') ) print('**********************************') print('**********************************') print('**********************************') print(it_goods_list) return None # ************************************* # gucci_kaola 组装url 新建文件夹 def download_kaola(download_type): img_save_path = './gucci/' + download_type + '/kaola_zh_/' mkdir(img_save_path) kaola_list = [] men_shoes_url_in = 'https://search.kaola.com/brand/1226-1078.html?pageSize=60&amp;pageNo=' men_shoes_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1078&amp;key=&amp;changeContent=c&amp;#search_crumbs' men_wallets_url_in = 'https://www.kaola.com/brand/1226-6259.html?pageSize=60&amp;pageNo=' men_wallets_url_out = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111380&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=6259&amp;key=&amp;changeContent=isStock&amp;#search_crumbs' men_bag_url_i = 'https://search.kaola.com/brand/1226-1027.html?pageSize=60&amp;pageNo=' men_bag_url_O = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1027&amp;key=&amp;changeContent=c&amp;#search_crumbs' men_cloths_url_i = 'https://search.kaola.com/brand/1226-1047.html?pageSize=60&amp;pageNo=' men_cloths_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1047&amp;key=&amp;changeContent=c&amp;#search_crumbs' men_belt_url_i = 'https://search.kaola.com/brand/1226-1073.html?pageSize=60&amp;pageNo=' men_belt_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111380&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=1058&amp;backCategory=1073&amp;key=&amp;changeContent=0&amp;#search_crumbs' women_shoes_url_in = 'https://search.kaola.com/brand/1226-1077.html?pageSize=60&amp;pageNo=' women_shoes_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111421&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1077&amp;key=&amp;changeContent=c&amp;#search_crumbs' women_wallets_url_in = 'https://search.kaola.com/brand/1226-6259.html?pageSize=60&amp;pageNo=' women_wallets_url_out = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111421&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=6259&amp;key=&amp;changeContent=0&amp;#search_crumbs' women_bag_url_i = 'https://search.kaola.com/brand/1226-1028.html?pageSize=60&amp;pageNo=' women_bag_url_O = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111421&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1028&amp;key=&amp;changeContent=c&amp;#search_crumbs' women_cloths_url_i = 'https://search.kaola.com/brand/1226-1048.html?pageSize=60&amp;pageNo=' women_cloths_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=1048&amp;key=&amp;changeContent=crumbs_0&amp;#search_crumbs' women_belt_url_i = 'https://search.kaola.com/brand/1226-1073.html?pageSize=60&amp;pageNo=' women_belt_url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111421&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=1058&amp;backCategory=1073&amp;key=&amp;changeContent=c&amp;#search_crumbs' url_in = '' url_out = '' if download_type == 'men_all_shoes': url_in = men_shoes_url_in url_out = men_shoes_url_o elif download_type == 'men_all_bags': url_in = men_bag_url_i url_out = men_bag_url_O elif download_type == 'men_all_clothes': url_in = men_cloths_url_i url_out = men_cloths_url_o elif download_type == 'men_all_belts': url_in = men_belt_url_i url_out = men_belt_url_o elif download_type == 'men_all_wallets': url_in = men_wallets_url_in url_out = men_wallets_url_out elif download_type == 'women_all_shoes': url_in = women_shoes_url_in url_out = women_shoes_url_o elif download_type == 'women_all_bags': url_in = women_bag_url_i url_out = women_bag_url_O elif download_type == 'women_all_clothes': url_in = women_cloths_url_i url_out = women_cloths_url_o elif download_type == 'women_all_belts': url_in = women_belt_url_i url_out = women_belt_url_o elif download_type == 'women_all_wallets': url_in = women_wallets_url_in url_out = women_wallets_url_out # url_i = 'https://www.kaola.com/brand/1226-6259.html?pageSize=60&amp;pageNo=' # url_o = '&amp;sortfield=0&amp;isStock=true&amp;isSelfProduct=false&amp;isPromote=false&amp;isTaxFree=false&amp;isDesc=true&amp;proIds=100224_4111380&amp;lowerPrice=-1&amp;upperPrice=-1&amp;isBrand=0&amp;headCategoryId=-1&amp;backCategory=6259&amp;key=&amp;#topTab' #page_number = 3 product_number = 0 for page_number in range(1,10): print(page_number) url = url_in + str(page_number) + url_out try: Soup = openUrl(url) except Exception as e: print(url) print('openUrl: ') print(e) continue # 起始分别从 12 11 11 11 1开始 数组位置从11 10 10 10 0开始 product_titles = Soup.find_all(class_="title") goods_prices_curs = Soup.find_all(class_='cur') goods_imgs = Soup.find_all(class_='img') goods_all_prices = Soup.find_all(class_='price') goods_colors = Soup.find_all(class_='skuwrap') sale_names = Soup.find_all(class_='selfflag') if(len(sale_names) == 0): break for list_i in range(0,len(goods_all_prices)-10): # 产品名称 、 产品链接 、产品到手价格 、 产品图片链接 、 产品所有价格 print('***************************************') product_title = product_titles[list_i + 11]['title'] # 如果是钱包 就略过 #if('钱包' in product_title): # continue # product_url = 'https://goods.kaola.com' + product_titles[list_i + 11]['href'] product_url_b = product_titles[list_i + 11]['href'] if('goods.kaola.com' in product_url_b): product_url = 'https:' + product_titles[list_i + 11]['href'] else: product_url = 'https://goods.kaola.com' + product_titles[list_i + 11]['href'] product_prices = goods_prices_curs[list_i + 10].text product_sales_name = re_str(sale_names[list_i].text) product_price = re.findall("\d+", product_prices)[0] #img_url = goods_imgs[10].img['data-src'] #product_img_url = 'http://' + img_url[2:len(img_url)] product_all_prices = goods_all_prices[list_i + 10].text print(product_title) print(product_url) print(product_prices) print(product_all_prices) img_url = goods_imgs[list_i + 10].img['data-src'] product_img_url = 'http://' + img_url[2:len(img_url)] product_other_color = '无' try: if (len(goods_colors) &gt; 0): goods_color_list = goods_colors[list_i].find_all(class_='skutag') product_other_color = '' for color_i in range(0, len(goods_color_list)): product_one_color = goods_color_list[color_i]['title'] product_other_color = product_one_color + ',' + product_other_color except Exception as e: print(e) continue try: if download: downUrl(product_img_url, img_save_path + str(product_number) + '.jpg') print(product_img_url) # time.sleep(random.randint(0, 1)) kaola_dict = {'product_title': product_title, 'product_url': product_url, 'product_prices': int(product_price), 'product_all_prices': product_all_prices, 'product_img_url':product_img_url, 'product_sales_name':product_sales_name, 'product_other_color':product_other_color, 'url_path':img_save_path + str(product_number) + '.jpg'} kaola_list.append(kaola_dict) print(product_sales_name) print(product_other_color) print(product_number) product_number = product_number + 1 except Exception as e: print("down error: ") print(e) continue print('---------------------------------------') kaola_list.sort(key=function) pickle.dump(kaola_list,open('./gucci/' + download_type + '/kaola_list.txt', 'wb') ) print(kaola_list) print('**********************************') print('**********************************') print('**********************************') return None # def write_all_excel(book, download_type): # sheet = book.add_worksheet(download_type) def write_excel(download_type, book): # ''' # 读取变量 kaola_list = pickle.load(open('./gucci/' + download_type + '/kaola_list.txt', 'rb')) it_goods_list = pickle.load(open('./gucci/' + download_type + '/it_goods_list.txt', 'rb')) zh_goods_list = pickle.load(open('./gucci/' + download_type + '/zh_goods_list.txt', 'rb')) sheet = book.add_worksheet(download_type) # 设置sheet表单元格列宽 sheet.set_column("A:A", 5 ) # 欧洲 sheet.set_column("B:B", 80.88) # 商品名称 sheet.set_column("C:C", 10.5 ) # 正面图 sheet.set_column("D:D", 19.38) # 货号 sheet.set_column("E:E", 12 ) # 欧洲零售价 sheet.set_column("F:F", 82 ) # 商品链接 sheet.set_column("G:G", 5 ) # 中国 sheet.set_column("H:H", 55 ) # 商品名称 sheet.set_column("I:I", 10.5 ) # 正面图 sheet.set_column("J:J", 19.38) # 货号 sheet.set_column("K:K", 8.25 ) # 欧洲零售价 sheet.set_column("L:L", 106 ) # 商品链接 sheet.set_column("M:M", 5 ) # 考拉 sheet.set_column("N:N", 89 ) # 商品名称 sheet.set_column("O:O", 11.5 ) # 正面图 sheet.set_column("P:P", 50.38) # 商品链接 sheet.set_column("Q:Q", 14.25 ) # 到手价 sheet.set_column("R:R", 25.75) # 所有售价 sheet.set_column("S:S", 25.75) # 备选颜色 sheet.set_column("T:T", 28.75) # 销售商家 # 设定整个sheet表的单元格的格式 property = { 'font_size': 11, # 字体大小 'bold': False, # 是否加粗 'align': 'center', # 水平对齐方式 left 'valign': 'vcenter', # 垂直对齐方式 'font_name': u'微软雅黑', 'text_wrap': False, # 是否自动换行 } cell_format = book.add_format(property) # 设置sheet表单元格行高 sheet.set_row(0, 22) # 设置第一行的高度为22 # 在向单元格中写入内容时,加上单元格样式 # 插入第一行 sheet.write(0, 0, '欧洲', cell_format) sheet.write(0, 1, '商品名称', cell_format) sheet.write(0, 2, '正面图', cell_format) sheet.write(0, 3, '货号', cell_format) sheet.write(0, 4, '欧洲零售价', cell_format) sheet.write(0, 5, '商品链接', cell_format) sheet.write(0, 6, '中国', cell_format) sheet.write(0, 7, '商品名称', cell_format) sheet.write(0, 8, '正面图', cell_format) sheet.write(0, 9, '货号', cell_format) sheet.write(0, 10, '中国零售价', cell_format) sheet.write(0, 11, '商品链接', cell_format) sheet.write(0, 12, '考拉', cell_format) sheet.write(0, 13, '商品名称', cell_format) sheet.write(0, 14, '正面图', cell_format) sheet.write(0, 15, '商品链接', cell_format) sheet.write(0, 16, '到手价', cell_format) sheet.write(0, 17, '所有售价', cell_format) sheet.write(0, 18, '备选颜色', cell_format) sheet.write(0, 19, '销售商家', cell_format) gucci_zh_img_format = {'x_offset':4, # 左右移动 'y_offset':0, 'x_scale':0.3, # 缩放比例 'y_scale':0.29} img_format = {'x_offset':4, # 左右移动 'y_offset':0, 'x_scale':0.2, # 缩放比例 'y_scale':0.19} # 插入爬取zh_goods_list信息 row_number = 1 for product in zh_goods_list: sheet.set_row(row_number, 52) # 设置第row_number行的高度为52 sheet.write(row_number, 7, product['product_title'], cell_format) sheet.insert_image(row_number, 8, product['url_path'], gucci_zh_img_format) sheet.write(row_number, 9, product['productCode'], cell_format) #product['productCode'] sheet.write(row_number, 10, '¥'+str(product['product_prices']), cell_format) sheet.write(row_number, 11, product['product_url'], cell_format) row_number = row_number + 1 # 插入爬取it_goods_list信息 row_number = 1 for product in it_goods_list: sheet.set_row(row_number, 52) # 设置第row_number行的高度为52 sheet.write(row_number, 1, product['product_title'], cell_format) sheet.insert_image(row_number, 2, product['url_path'], img_format) sheet.write(row_number, 3, product['productCode'], cell_format) #product['productCode'] sheet.write(row_number, 4, '€'+str(product['product_prices']), cell_format) sheet.write(row_number, 5, product['product_url'], cell_format) row_number = row_number + 1 # 插入爬取kaola_list信息 row_number = 1 for product in kaola_list: sheet.set_row(row_number, 52) # 设置第row_number行的高度为52 sheet.write(row_number, 13, product['product_title'], cell_format) sheet.insert_image(row_number, 14, product['url_path'], img_format) sheet.write(row_number, 15, product['product_url'], cell_format) sheet.write(row_number, 16, '¥'+str(product['product_prices']), cell_format) sheet.write(row_number, 17, product['product_all_prices'], cell_format) sheet.write(row_number, 18, product['product_other_color'], cell_format) sheet.write(row_number, 19, product['product_sales_name'], cell_format) row_number = row_number + 1 # book.close() print(download_type + ': 写入EXCEL成功') return book def get_all_list(): men_all = ['men_all_shoes', 'men_all_bags', 'men_all_belts', 'men_all_clothes', 'men_all_wallets'] women_all = ['women_all_shoes', 'women_all_bags', 'women_all_belts', 'women_all_clothes', 'women_all_wallets'] return men_all,women_all def write_excel_to_path(men_all, excel_name): book = xlsxwriter.Workbook(excel_name) for i in range(0, len(men_all)): print(men_all[i]) book = write_excel(men_all[i], book) book.close() print(excel_name + ': 写入EXCEL成功') return None # 1、更改 当前下载类型 # 2、更改个URL # 3、打开下载开关 # men_all_types = [] # men_all_types.append('men_all_shoes') # 'men_all_shoes' 'men_all_bags' 'men_all_belts' 'men_all_clothes' 'men_all_wallets' # 'women_all_shoes' 'women_all_bags' 'women_all_belts' 'women_all_clothes' 'women_all_wallets' # download_type = 'women_all_wallets' # #download = False #True # download_gucci_zh(download_type) # download_gucci_it(download_type) # download_kaola(download_type) # write_excel(download_type) def strat_download(): men_all,women_all = get_all_list() for i in range(0, len(men_all)): if(men_all[i] == 'men_all_shoes'): download_gucci_zh(men_all[i]) download_gucci_it(men_all[i]) download_kaola(men_all[i]) else: download_gucci_zh(men_all[i]) download_gucci_it(men_all[i]) download_kaola(men_all[i]) for i in range(0, len(women_all)): download_gucci_zh(women_all[i]) download_gucci_it(women_all[i]) download_kaola(women_all[i]) write_excel_to_path(men_all, 'men_all.xlsx') write_excel_to_path(women_all, 'women_all.xlsx') if __name__ == "__main__": # strat_download() error_message = '' # mGucciDownload = GucciDownload() # print(mGucciDownload.download_message) # mGucciDownload.download_message = 'abc' # print(mGucciDownload.download_message) # mGucciDownload.download_message = 'abcfgr' # print(mGucciDownload.download_message) global mGucciDownload mGucciDownload = GucciDownload() mGucciDownload.download_message = '准备下载。。。请点击开始进行下载。。。' download = True #True str_text = 'hello word' app = QApplication(sys.argv) qWidget = QWidget() # 大小 位置 qWidget.resize(350, 450) qWidget.move(400, 300) # 设置标题名称 qWidget.setWindowTitle('Gucci_product') # 文本框1 global qTextMessage qTextMessage = QTextEdit() qTextMessage.setText(mGucciDownload.download_message) # 文本框2 # qTextErrorMessage = QTextEdit() # qTextErrorMessage.setText('准备下载。。。请点击开始进行下载。。。') # 开始按钮 updateBtn = QPushButton('开始下载') updateBtn.setStyleSheet(''' text-align : center; background-color : NavajoWhite; height : 30px; border-style: outset; font : 13px ''') # 监听按钮点击后调用函数 updateBtn.clicked.connect(strat_download) # 新建一个布局 添加各个组件 hLayout = QHBoxLayout() hLayout.addWidget(updateBtn) hLayout.addWidget(qTextMessage) # hLayout.addWidget(qTextErrorMessage) # 添加布局到窗口 qWidget.setLayout(hLayout) qWidget.show() sys.exit(app.exec_()) </code></pre>

页面列表

ITEM_HTML