python


7、京东write2excel

<pre><code class="language-python">from bs4 import BeautifulSoup import urllib.request # import pandas as pd import ssl import time import random import xlsxwriter import re import json import os import pickle import socket import sys import pandas as pd</code></pre> <pre><code class="language-python">download_type_number =\ { 'men_all_shoes':0, 'men_all_bags':0, 'men_all_belts':0, 'men_all_clothes':0, 'men_all_wallets':0, 'men_all_shoes':0, 'men_all_bags':0, 'men_all_belts':0, 'men_all_clothes':0, 'men_all_wallets':0 }</code></pre> <pre><code class="language-python">def jd_write_excel(download_type, book): # ''' # 读取变量 df = pd.DataFrame(pd.read_csv('../PycharmProjects/excel/' + download_type + '.csv',header=0)) df = df.fillna('') product_vip_prices = df['字段3'] product_prices = df['字段2'] product_names = df['字段4'] product_comments = df['字段8'] product_urls = df['字段9_链接'] product_company = df['字段11_文本'] product_sales_activity = df['字段12']+ ';'+df['字段13']+';'+df['字段14'] #jd_goods_list = pickle.load(open('./PycharmProjects/excel/' + download_type + '/.csv', 'rb')) sheet = book.add_worksheet(download_type) # 设置sheet表单元格列宽 sheet.set_column("A:A", 5) # 京东 sheet.set_column("B:B", 102) # 商品名称 sheet.set_column("C:C", 10.5) # 正面图 sheet.set_column("D:D", 19.38) # 评论数 sheet.set_column("E:E", 8.25) # 京东售价 sheet.set_column("F:F", 8.25) # vip售价 sheet.set_column("G:G", 32) # 优惠信息 sheet.set_column("H:H", 106) # 商品链接 # 设定整个sheet表的单元格的格式 property = { 'font_size': 11, # 字体大小 'bold': False, # 是否加粗 'align': 'center', # 水平对齐方式 left 'valign': 'vcenter', # 垂直对齐方式 'font_name': u'微软雅黑', 'text_wrap': False, # 是否自动换行 } cell_format = book.add_format(property) # 设置sheet表单元格行高 sheet.set_row(0, 22) # 设置第一行的高度为22 # 在向单元格中写入内容时,加上单元格样式 # 插入第一行 sheet.write(0, 0, '京东', cell_format) sheet.write(0, 1, '商品名称', cell_format) sheet.write(0, 2, '正面图', cell_format) sheet.write(0, 3, '评论数', cell_format) sheet.write(0, 4, '京东零售价', cell_format) sheet.write(0, 5, 'vip售价', cell_format) sheet.write(0, 6, '优惠信息', cell_format) sheet.write(0, 7, '商品链接', cell_format) gucci_zh_img_format = {'x_offset': 4, # 左右移动 'y_offset': 0, 'x_scale': 0.3, # 缩放比例 'y_scale': 0.29} img_format = {'x_offset': 4, # 左右移动 'y_offset': 0, 'x_scale': 0.2, # 缩放比例 'y_scale': 0.19} # 插入爬取zh_goods_list信息 row_number = 1 for i in range(0,len(product_prices)): sheet.set_row(row_number, 52) # 设置第row_number行的高度为52 sheet.write(row_number, 1, product_names[i], cell_format) # sheet.insert_image(row_number, 2, product['url_path'], gucci_zh_img_format) sheet.write(row_number, 3, product_comments[i], cell_format) # product['productCode'] sheet.write(row_number, 4, '¥' + str(product_prices[i]), cell_format) sheet.write(row_number, 5, product_vip_prices[i], cell_format) sheet.write(row_number, 6, product_sales_activity[i], cell_format) if '#comment' in product_urls[i]: a = product_urls[i] sheet.write(row_number, 7, a[:len(a)-8], cell_format) else: continue row_number = row_number + 1 download_type_number[download_type] = row_number # book.close() print(download_type + ': 写入EXCEL成功') return book</code></pre> <pre><code class="language-python">def write_home_page(book): keys = [] for k in download_type_number.keys(): keys.append(k) sheet = book.add_worksheet('Home') # 设置sheet表单元格列宽 sheet.set_column("A:A", 20) # 种类 sheet.set_column("B:B", 8.5) # 商品名称 # 设定整个sheet表的单元格的格式 property = { 'font_size': 11, # 字体大小 'bold': False, # 是否加粗 'align': 'center', # 水平对齐方式 left 'valign': 'vcenter', # 垂直对齐方式 'font_name': u'微软雅黑', 'text_wrap': False, # 是否自动换行 } cell_format = book.add_format(property) sheet.set_row(0, 22) # 设置第一行的高度为22 # 插入第一行 sheet.write(0, 0, '种类', cell_format) sheet.write(0, 1, '个数', cell_format) row_number = 1 all_number = 0 for i in range(0,len(download_type_number)): sheet.set_row(row_number, 22) # 设置第row_number行的高度为22 sheet.write(row_number, 0, keys[i], cell_format) all_number = all_number + download_type_number[keys[i]] - 1 sheet.write(row_number, 1, download_type_number[keys[i]] - 1, cell_format) # product['productCode'] row_number = row_number + 1 sheet.write(row_number, 0, '总计', cell_format) sheet.write(row_number, 1, all_number, cell_format) # book.close() print('HomePage' + ': 写入EXCEL成功') return book</code></pre> <pre><code class="language-python">def jd_write_excel_to_path(men_all, women_all, excel_name): book = xlsxwriter.Workbook(excel_name) for i in range(0, len(men_all)): print(men_all[i]) book = jd_write_excel(men_all[i], book) for i in range(0, len(women_all)): print(men_all[i]) book = jd_write_excel(women_all[i], book) write_home_page(book) book.close() print(excel_name + ': 写入EXCEL成功') return None</code></pre> <pre><code class="language-python">def get_all_list(): men_all = ['men_all_shoes', 'men_all_bags', 'men_all_belts', 'men_all_clothes', 'men_all_wallets', 'men_all_scarves'] women_all = ['women_all_shoes', 'women_all_bags', 'women_all_belts', 'women_all_clothes', 'women_all_wallets', 'women_all_scarves'] return men_all, women_all</code></pre> <pre><code class="language-python">men_all, women_all = get_all_list()</code></pre> <pre><code class="language-python">jd_write_excel_to_path(men_all, women_all, 'jd_excel-20.xlsx')</code></pre> <pre><code>men_all_shoes men_all_shoes: 写入EXCEL成功 men_all_bags men_all_bags: 写入EXCEL成功 men_all_belts men_all_belts: 写入EXCEL成功 men_all_clothes men_all_clothes: 写入EXCEL成功 men_all_wallets men_all_wallets: 写入EXCEL成功 men_all_scarves men_all_scarves: 写入EXCEL成功 men_all_shoes women_all_shoes: 写入EXCEL成功 men_all_bags women_all_bags: 写入EXCEL成功 men_all_belts women_all_belts: 写入EXCEL成功 men_all_clothes women_all_clothes: 写入EXCEL成功 men_all_wallets women_all_wallets: 写入EXCEL成功 men_all_scarves women_all_scarves: 写入EXCEL成功 HomePage: 写入EXCEL成功 jd_excel-20.xlsx: 写入EXCEL成功</code></pre> <pre><code class="language-python">df = pd.DataFrame(pd.read_csv('../PycharmProjects/excel/' + 'men_all_shoes' + '.csv',header=0)) df = df.fillna('')</code></pre> <pre><code class="language-python">df = df.fillna('') product_vip_prices = df['字段3'] product_prices = df['字段2'] product_names = df['字段4'] product_comments = df['字段8'] product_urls = df['字段9_链接'] product_company = df['字段11_文本'] product_sales_activity = df['字段12']+ ';'+df['字段13']+';'+df['字段14']</code></pre> <pre><code class="language-python">download_type_number</code></pre> <pre><code>{'men_all_bags': 1164, 'men_all_belts': 709, 'men_all_clothes': 1851, 'men_all_shoes': 2096, 'men_all_wallets': 1046, 'women_all_bags': 3349, 'women_all_belts': 366, 'women_all_clothes': 1307, 'women_all_shoes': 2262, 'women_all_wallets': 1421}</code></pre> <pre><code class="language-python">keys = [] for k in download_type_number.keys(): keys.append(k)</code></pre> <pre><code class="language-python">keys</code></pre> <pre><code>['men_all_shoes', 'men_all_bags', 'men_all_belts', 'men_all_clothes', 'men_all_wallets', 'women_all_shoes', 'women_all_bags', 'women_all_belts', 'women_all_clothes', 'women_all_wallets']</code></pre> <pre><code class="language-python">download_type_number[keys[0]]-1</code></pre> <pre><code>2095</code></pre>

页面列表

ITEM_HTML