4、京东(2)
<pre><code class="language-python">from bs4 import BeautifulSoup
import urllib.request
# import pandas as pd
import ssl
import time
import random
import xlsxwriter
import re
import json
import os
import pickle
import socket
import sys
import pandas as pd</code></pre>
<pre><code class="language-python">download_type_number =\
{
'men_all_shoes':0,
'men_all_bags':0,
'men_all_belts':0,
'men_all_clothes':0,
'men_all_wallets':0,
'men_all_shoes':0,
'men_all_bags':0,
'men_all_belts':0,
'men_all_clothes':0,
'men_all_wallets':0
}</code></pre>
<pre><code class="language-python">def jd_write_excel(download_type, book):
# '''
# 读取变量
df = pd.DataFrame(pd.read_csv('../PycharmProjects/excel/' + download_type + '.csv',header=0))
df = df.fillna('')
product_vip_prices = df['字段3']
product_prices = df['字段2']
product_names = df['字段4']
product_comments = df['字段8']
product_urls = df['字段9_链接']
product_company = df['字段11_文本']
product_sales_activity = df['字段12']+ ';'+df['字段13']+';'+df['字段14']
#jd_goods_list = pickle.load(open('./PycharmProjects/excel/' + download_type + '/.csv', 'rb'))
sheet = book.add_worksheet(download_type)
# 设置sheet表单元格列宽
sheet.set_column("A:A", 5) # 京东
sheet.set_column("B:B", 102) # 商品名称
sheet.set_column("C:C", 10.5) # 正面图
sheet.set_column("D:D", 19.38) # 评论数
sheet.set_column("E:E", 8.25) # 京东售价
sheet.set_column("F:F", 8.25) # vip售价
sheet.set_column("G:G", 32) # 优惠信息
sheet.set_column("H:H", 106) # 商品链接
# 设定整个sheet表的单元格的格式
property = {
'font_size': 11, # 字体大小
'bold': False, # 是否加粗
'align': 'center', # 水平对齐方式 left
'valign': 'vcenter', # 垂直对齐方式
'font_name': u'微软雅黑',
'text_wrap': False, # 是否自动换行
}
cell_format = book.add_format(property)
# 设置sheet表单元格行高
sheet.set_row(0, 22) # 设置第一行的高度为22
# 在向单元格中写入内容时,加上单元格样式
# 插入第一行
sheet.write(0, 0, '京东', cell_format)
sheet.write(0, 1, '商品名称', cell_format)
sheet.write(0, 2, '正面图', cell_format)
sheet.write(0, 3, '评论数', cell_format)
sheet.write(0, 4, '京东零售价', cell_format)
sheet.write(0, 5, 'vip售价', cell_format)
sheet.write(0, 6, '优惠信息', cell_format)
sheet.write(0, 7, '商品链接', cell_format)
gucci_zh_img_format = {'x_offset': 4, # 左右移动
'y_offset': 0,
'x_scale': 0.3, # 缩放比例
'y_scale': 0.29}
img_format = {'x_offset': 4, # 左右移动
'y_offset': 0,
'x_scale': 0.2, # 缩放比例
'y_scale': 0.19}
# 插入爬取zh_goods_list信息
row_number = 1
for i in range(0,len(product_prices)):
sheet.set_row(row_number, 52) # 设置第row_number行的高度为52
sheet.write(row_number, 1, product_names[i], cell_format)
# sheet.insert_image(row_number, 2, product['url_path'], gucci_zh_img_format)
sheet.write(row_number, 3, product_comments[i], cell_format) # product['productCode']
sheet.write(row_number, 4, '¥' + str(product_prices[i]), cell_format)
sheet.write(row_number, 5, product_vip_prices[i], cell_format)
sheet.write(row_number, 6, product_sales_activity[i], cell_format)
if '#comment' in product_urls[i]:
a = product_urls[i]
sheet.write(row_number, 7, a[:len(a)-8], cell_format)
else:
continue
row_number = row_number + 1
download_type_number[download_type] = row_number
# book.close()
print(download_type + ': 写入EXCEL成功')
return book</code></pre>
<pre><code class="language-python">def write_home_page(book):
keys = []
for k in download_type_number.keys():
keys.append(k)
sheet = book.add_worksheet('Home')
# 设置sheet表单元格列宽
sheet.set_column("A:A", 20) # 种类
sheet.set_column("B:B", 8.5) # 商品名称
# 设定整个sheet表的单元格的格式
property = {
'font_size': 11, # 字体大小
'bold': False, # 是否加粗
'align': 'center', # 水平对齐方式 left
'valign': 'vcenter', # 垂直对齐方式
'font_name': u'微软雅黑',
'text_wrap': False, # 是否自动换行
}
cell_format = book.add_format(property)
sheet.set_row(0, 22) # 设置第一行的高度为22
# 插入第一行
sheet.write(0, 0, '种类', cell_format)
sheet.write(0, 1, '个数', cell_format)
row_number = 1
for i in range(0,len(download_type_number)):
sheet.set_row(row_number, 22) # 设置第row_number行的高度为22
sheet.write(row_number, 0, keys[i], cell_format)
sheet.write(row_number, 1, download_type_number[keys[i]] - 1, cell_format) # product['productCode']
row_number = row_number + 1
# book.close()
print('HomePage' + ': 写入EXCEL成功')
return book</code></pre>
<pre><code class="language-python">def jd_write_excel_to_path(men_all, women_all, excel_name):
book = xlsxwriter.Workbook(excel_name)
for i in range(0, len(men_all)):
print(men_all[i])
book = jd_write_excel(men_all[i], book)
for i in range(0, len(women_all)):
print(men_all[i])
book = jd_write_excel(women_all[i], book)
write_home_page(book)
book.close()
print(excel_name + ': 写入EXCEL成功')
return None</code></pre>
<pre><code class="language-python">def get_all_list():
men_all = ['men_all_shoes', 'men_all_bags', 'men_all_belts', 'men_all_clothes', 'men_all_wallets']
women_all = ['women_all_shoes', 'women_all_bags', 'women_all_belts', 'women_all_clothes', 'women_all_wallets']
return men_all, women_all</code></pre>
<pre><code class="language-python">men_all, women_all = get_all_list()</code></pre>
<pre><code class="language-python">jd_write_excel_to_path(men_all, women_all, 'jd_excel5.xlsx')</code></pre>
<pre><code>men_all_shoes
men_all_shoes: 写入EXCEL成功
men_all_bags
men_all_bags: 写入EXCEL成功
men_all_belts
men_all_belts: 写入EXCEL成功
men_all_clothes
men_all_clothes: 写入EXCEL成功
men_all_wallets
men_all_wallets: 写入EXCEL成功
men_all_shoes
women_all_shoes: 写入EXCEL成功
men_all_bags
women_all_bags: 写入EXCEL成功
men_all_belts
women_all_belts: 写入EXCEL成功
men_all_clothes
women_all_clothes: 写入EXCEL成功
men_all_wallets
women_all_wallets: 写入EXCEL成功
HomePage: 写入EXCEL成功
jd_excel5.xlsx: 写入EXCEL成功</code></pre>
<pre><code class="language-python">df = pd.DataFrame(pd.read_csv('../PycharmProjects/excel/' + 'men_all_shoes' + '.csv',header=0))
df = df.fillna('')</code></pre>
<pre><code class="language-python">df = df.fillna('')
product_vip_prices = df['字段3']
product_prices = df['字段2']
product_names = df['字段4']
product_comments = df['字段8']
product_urls = df['字段9_链接']
product_company = df['字段11_文本']
product_sales_activity = df['字段12']+ ';'+df['字段13']+';'+df['字段14']</code></pre>
<pre><code class="language-python">download_type_number</code></pre>
<pre><code>{'men_all_bags': 1164,
'men_all_belts': 709,
'men_all_clothes': 1851,
'men_all_shoes': 2096,
'men_all_wallets': 1046,
'women_all_bags': 3349,
'women_all_belts': 366,
'women_all_clothes': 1307,
'women_all_shoes': 2262,
'women_all_wallets': 1421}</code></pre>
<pre><code class="language-python">keys = []
for k in download_type_number.keys():
keys.append(k)</code></pre>
<pre><code class="language-python">keys</code></pre>
<pre><code>['men_all_shoes',
'men_all_bags',
'men_all_belts',
'men_all_clothes',
'men_all_wallets',
'women_all_shoes',
'women_all_bags',
'women_all_belts',
'women_all_clothes',
'women_all_wallets']</code></pre>
<pre><code class="language-python">download_type_number[keys[0]]-1</code></pre>
<pre><code>2095</code></pre>