python


10、red小红书

<pre><code class="language-python">from selenium import webdriver browser = webdriver.Chrome() </code></pre> <pre><code class="language-python">browser.get('https://www.xiaohongshu.com/page/brands/5a43848e8000862471d15040?openPage=yes&amp;xhs_g_s=0066&amp;banner_id=5aa74c7e6d0bd31b991b48c3&amp;xhs_channel=0090_0090_0090_0066&amp;naviHidden=yes&amp;tab=goods&amp;goods_id=5965c1a170e75226a4192989&amp;_at=52f170df4d108064a431fccee7464b20fa454') </code></pre> <pre><code class="language-python">js="var q=document.documentElement.scrollTop=300" browser.execute_script(js)</code></pre> <pre><code class="language-python">i = 300</code></pre> <pre><code class="language-python">js="var q=document.documentElement.scrollTop="+str(i) browser.execute_script(js) i = i+800</code></pre> <pre><code class="language-python">import time</code></pre> <pre><code class="language-python">for j in range(1,82): if j &lt; 80: js="var q=document.documentElement.scrollTop="+str(i) browser.execute_script(js) i = i+800 time.sleep(3) #j = j+ 1 else: page_source = browser.page_source </code></pre> <pre><code class="language-python">j</code></pre> <pre><code>41</code></pre> <pre><code class="language-python">page_source = browser.page_source</code></pre> <pre><code class="language-python">from bs4 import BeautifulSoup</code></pre> <pre><code class="language-python">Soup = BeautifulSoup(page_source, 'lxml')</code></pre> <pre><code class="language-python"></code></pre> <pre><code class="language-python">product_titles = Soup.find_all(class_="cube-goods-card__txtwrapper")</code></pre> <pre><code class="language-python">len(product_titles)</code></pre> <pre><code>1697</code></pre> <pre><code class="language-python">product_titles[0]</code></pre> <pre><code>&lt;div class="cube-goods-card__txtwrapper" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__top" data-v-6b7dda6d=""&gt;&lt;h4 data-v-6b7dda6d=""&gt;意大利·工艺精湛奢华独特&lt;/h4&gt; &lt;p data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;span data-v-6b7dda6d=""&gt;GUCCI古驰 GUCCI 古驰 女士 GG Marmont 系列绗缝迷你链条单肩包 黑色&lt;/span&gt;&lt;/p&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__center-right" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;div class="vendor-icon cube-image bg-image" data-v-6b7dda6d="" data-v-77f65c36="" show="true" style='background-image: url("http://img.xiaohongshu.com/seller/bea8891c1f4bc887b6587963e165c5ec");'&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center-left" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;8759&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;9500&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">page_source = browser.page_source</code></pre> <pre><code class="language-python">Soup = BeautifulSoup(page_source, 'lxml') product_titles = Soup.find_all(class_="cube-goods-card__txtwrapper")</code></pre> <pre><code class="language-python">import pickle</code></pre> <pre><code class="language-python">pickle.dump(page_source, open('red_page_source.txt', 'wb'))</code></pre> <pre><code class="language-python">red_page_source = pickle.load(open('red_page_source.txt', 'rb'))</code></pre> <pre><code class="language-python">red_soup = BeautifulSoup(red_page_source, 'lxml') product_titles = red_soup.find_all(class_='cube-goods-card__top') product_ids = red_soup.find_all(class_='good cube-goods-card') product_img_url = product_ids[0](class_='cube-item-image-container cube-goods-card__img cube-image normal-image')[0].img['src'] product_id = product_ids[0]['data-id']#1691 product_title_1 = product_titles[0].h4.text#1697 product_title_2 = product_titles[0].span.text product_price = product_ids[0](class_='cube-goods-card__center-left')[0].text </code></pre> <pre><code class="language-python">product_ids = red_soup.find_all(class_='good cube-goods-card') product_id = product_ids[0]['data-id'] len(product_ids)</code></pre> <pre><code>1691</code></pre> <pre><code class="language-python">product_ids[0]</code></pre> <pre><code>&lt;div class="good cube-goods-card" data-eaglet-imp="true" data-id="5c122ffdc9d75972c3930355" data-owl-imp="true" data-v-6b7dda6d="" data-v-aba99e64="" data-v-ddc045f8="" eaglet='{"event":{"type":"Event","value":{"targetType":{"type":"RichTargetType","value":"mall_goods"}}},"tagTarget":{"type":"TagTarget","value":{"tagType":{"type":"TagType","value":"tag_brand"}}},"index":{"type":"Index","value":{"objectPosition":1,"channelTabId":"all","channelTabName":"全部"}},"mallGoodsTarget":{"type":"MallGoodsTarget","value":{"goodsId":"5c122ffdc9d75972c3930355","trackId":"45d37fa55b7da8836eb39c69f4adc8e6"}}}' owl="goods/5c122ffdc9d75972c3930355"&gt;&lt;div class="cube-item-image-container cube-goods-card__img cube-image normal-image" data-v-24dbd85a="" data-v-6b7dda6d="" data-v-77f65c36="" enableautowebptransform="true" options="[object Object]" show="true"&gt;&lt;!-- --&gt; &lt;img alt="" class="" data-v-77f65c36="" src="https://img.xiaohongshu.com/fdeaf560-2986-430c-a4e7-24c78b3aa927@_320w_320h_1e_1c_0i_90Q_1x_2o.jpg"/&gt; &lt;div data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt;&lt;!-- --&gt;&lt;/div&gt; &lt;div class="promotion-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;div class="footer-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;!-- --&gt; &lt;!-- --&gt;&lt;/div&gt; &lt;div class="cube-goods-card__txtwrapper" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__top" data-v-6b7dda6d=""&gt;&lt;h4 data-v-6b7dda6d=""&gt;意大利·工艺精湛奢华独特&lt;/h4&gt; &lt;p data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;span data-v-6b7dda6d=""&gt;GUCCI古驰 GUCCI 古驰 女士 GG Marmont 系列绗缝迷你链条单肩包 黑色&lt;/span&gt;&lt;/p&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__center-right" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;div class="vendor-icon cube-image bg-image" data-v-6b7dda6d="" data-v-77f65c36="" show="true" style='background-image: url("http://img.xiaohongshu.com/seller/bea8891c1f4bc887b6587963e165c5ec");'&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center-left" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;8759&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;9500&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">product_ids[0](class_='cube-goods-card__top')[0].span.text</code></pre> <pre><code>'GUCCI古驰 GUCCI 古驰 女士 GG Marmont 系列绗缝迷你链条单肩包 黑色'</code></pre> <pre><code class="language-python">product_ids[0](class_='cube-goods-card__center-left')[0].text</code></pre> <pre><code>' ¥ 8759 ¥ 9500'</code></pre> <pre><code class="language-python">product_titles = red_soup.find_all(class_='cube-goods-card__top')</code></pre> <pre><code class="language-python">product_titles[0]</code></pre> <pre><code>&lt;div class="cube-goods-card__top" data-v-6b7dda6d=""&gt;&lt;h4 data-v-6b7dda6d=""&gt;意大利·工艺精湛奢华独特&lt;/h4&gt; &lt;p data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;span data-v-6b7dda6d=""&gt;GUCCI古驰 GUCCI 古驰 女士 GG Marmont 系列绗缝迷你链条单肩包 黑色&lt;/span&gt;&lt;/p&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">product_title_1 = product_titles[0].h4.text product_title_2 = product_titles[0].span.text</code></pre> <pre><code>'意大利·工艺精湛奢华独特'</code></pre> <pre><code class="language-python"></code></pre> <pre><code>'GUCCI古驰 GUCCI 古驰 女士 GG Marmont 系列绗缝迷你链条单肩包 黑色'</code></pre> <pre><code class="language-python">len(product_titles)</code></pre> <pre><code>1697</code></pre> <pre><code class="language-python">prodict_img_url = red_soup.find_all(class_='cube-item-image-container cube-goods-card__img cube-image normal-image')</code></pre> <pre><code class="language-python">len(prodict_img_url)</code></pre> <pre><code>1697</code></pre> <pre><code class="language-python">prodict_now_prices = red_soup.find_all(class_='cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-')</code></pre> <pre><code class="language-python">len(prodict_now_prices)</code></pre> <pre><code>1697</code></pre> <pre><code class="language-python">prodict_now_prices[0]</code></pre> <pre><code>&lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;8759&lt;/span&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">prodict_now_prices[0].text</code></pre> <pre><code>'¥ 8759'</code></pre> <pre><code class="language-python">product_ids[i](class_='cube-goods-card__center-left')[0]</code></pre> <pre><code>&lt;div class="cube-goods-card__center-left" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;1699&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;3680&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">product_ids[i](class_='cube-goods-card__center-left')[0](class_='cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-')[0].text</code></pre> <pre><code>'¥ 1699'</code></pre> <pre><code class="language-python">product_ids[i](class_='cube-goods-card__center-left')[0](class_='cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through')[0].text</code></pre> <pre><code>'¥ 3680'</code></pre> <pre><code class="language-python">prodict_old_prices = red_soup.find_all(class_='cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through') len(prodict_old_prices)</code></pre> <pre><code>1509</code></pre> <pre><code class="language-python">prodict_prices = red_soup.find_all(class_='cube-goods-card__center-left')</code></pre> <pre><code class="language-python">len(product_ids)</code></pre> <pre><code>1691</code></pre> <pre><code class="language-python">prodict_now_prices = red_soup.find_all(class_='cube-goods-card__center-left') prodict_now_prices = red_soup.find_all(class_='good cube-goods-card')</code></pre> <pre><code class="language-python">product_ids[19]</code></pre> <pre><code>&lt;div class="good cube-goods-card" data-eaglet-imp="true" data-id="5c122dd970e75236b2278404" data-owl-imp="true" data-v-6b7dda6d="" data-v-aba99e64="" data-v-ddc045f8="" eaglet='{"event":{"type":"Event","value":{"targetType":{"type":"RichTargetType","value":"mall_goods"}}},"tagTarget":{"type":"TagTarget","value":{"tagType":{"type":"TagType","value":"tag_brand"}}},"index":{"type":"Index","value":{"objectPosition":39,"channelTabId":"all","channelTabName":"全部"}},"mallGoodsTarget":{"type":"MallGoodsTarget","value":{"goodsId":"5c122dd970e75236b2278404","trackId":"706ec784c072bf35d2463d4c4a39932b"}}}' owl="goods/5c122dd970e75236b2278404"&gt;&lt;div class="cube-item-image-container cube-goods-card__img cube-image normal-image" data-v-24dbd85a="" data-v-6b7dda6d="" data-v-77f65c36="" enableautowebptransform="true" options="[object Object]" show="true"&gt;&lt;!-- --&gt; &lt;img alt="" class="" data-v-77f65c36="" src="https://img.xiaohongshu.com/ac316db1-ac2c-44ea-85d2-79bd5e0992d1@_320w_320h_1e_1c_0i_90Q_1x_2o.jpg"/&gt; &lt;div data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt;&lt;!-- --&gt;&lt;/div&gt; &lt;div class="promotion-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;div class="footer-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;!-- --&gt; &lt;!-- --&gt;&lt;/div&gt; &lt;div class="cube-goods-card__txtwrapper" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__top" data-v-6b7dda6d=""&gt;&lt;h4 data-v-6b7dda6d=""&gt;意大利·工艺精湛奢华独特&lt;/h4&gt; &lt;p data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;span data-v-6b7dda6d=""&gt;GUCCI古驰 女士 Dionysus 系列迷你链条斜挎单肩包 黑色&lt;/span&gt;&lt;/p&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__center-right" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;div class="vendor-icon cube-image bg-image" data-v-6b7dda6d="" data-v-77f65c36="" show="true"&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center-left" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;9599&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;11500&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python">product_ids[20]</code></pre> <pre><code>&lt;div class="good cube-goods-card" data-eaglet-imp="true" data-id="5b6421bb462839743b860888" data-owl-imp="true" data-v-6b7dda6d="" data-v-aba99e64="" data-v-ddc045f8="" eaglet='{"event":{"type":"Event","value":{"targetType":{"type":"RichTargetType","value":"mall_goods"}}},"tagTarget":{"type":"TagTarget","value":{"tagType":{"type":"TagType","value":"tag_brand"}}},"index":{"type":"Index","value":{"objectPosition":41,"channelTabId":"all","channelTabName":"全部"}},"mallGoodsTarget":{"type":"MallGoodsTarget","value":{"goodsId":"5b6421bb462839743b860888","trackId":"f94a55b19da492f64bc7c61f10bb11b1"}}}' owl="goods/5b6421bb462839743b860888"&gt;&lt;div class="cube-item-image-container cube-goods-card__img cube-image normal-image" data-v-24dbd85a="" data-v-6b7dda6d="" data-v-77f65c36="" enableautowebptransform="true" options="[object Object]" show="true"&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt;&lt;!-- --&gt;&lt;/div&gt; &lt;div class="promotion-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;div class="footer-text" data-v-24dbd85a="" data-v-77f65c36="" style="display: none;"&gt; &lt;/div&gt; &lt;!-- --&gt; &lt;!-- --&gt;&lt;/div&gt; &lt;div class="cube-goods-card__txtwrapper" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__top" data-v-6b7dda6d=""&gt;&lt;h4 data-v-6b7dda6d=""&gt;意大利·复古奢华贵族享受&lt;/h4&gt; &lt;p data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;span data-v-6b7dda6d=""&gt;GUCCI古驰 Sylvie系列女士皮质迷你手提单肩斜挎包 黑&lt;/span&gt;&lt;/p&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__center-right" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;div class="vendor-icon cube-image bg-image" data-v-6b7dda6d="" data-v-77f65c36="" show="true"&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt; &lt;div class="cube-goods-card__center-left" data-v-6b7dda6d=""&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;35000&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="cube-goods-card__num" data-v-6b7dda6d=""&gt;&lt;!-- --&gt; &lt;!-- --&gt; &lt;div class="cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through" data-v-69c37f86="" data-v-6b7dda6d=""&gt;&lt;span class="cube-price__icon" data-v-69c37f86=""&gt;¥&lt;/span&gt; &lt;span data-v-69c37f86=""&gt;45000&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt; &lt;!-- --&gt;&lt;/div&gt;</code></pre> <pre><code class="language-python"></code></pre> <pre><code class="language-python">product_infos= [] red_soup = BeautifulSoup(red_page_source, 'lxml') # product_titles = red_soup.find_all(class_='cube-goods-card__top') product_ids = red_soup.find_all(class_='good cube-goods-card') for i in range(0,len(product_ids)): try: product_img_url = product_ids[i](class_='cube-item-image-container cube-goods-card__img cube-image normal-image')[0].img['src'] product_id = product_ids[i]['data-id']#1691 product_title_1 = product_ids[i](class_='cube-goods-card__top')[0].h4.text#1697 product_title_2 = product_ids[i](class_='cube-goods-card__top')[0].span.text # product_price_1 = product_ids[i](class_='cube-goods-card__center-left')[0].text product_price = product_ids[i](class_='cube-goods-card__center-left')[0](class_='cube-price --sale --icon-size-m --size-m --color-red --weight-medium --decoration-')[0].text try: product_price_2 = product_ids[i](class_='cube-goods-card__center-left')[0](class_='cube-price --sale --icon-size-xs --size-xs --color-grey --weight-medium --decoration-line-through')[0].text except Exception as e: product_price_2 = '' url_i = 'https://pages.xiaohongshu.com/goods/' url_o = '?xhs_g_s=0094&amp;banner_id=5aa74c7e6d0bd31b991b48c3&amp;xhs_channel=0090_0090_0066_0094&amp;naviHidden=yes&amp;openPage=yes' product_info =\ { 'product_img_url':product_img_url, 'product_url':url_i + product_id + url_o, 'product_title':product_title_2, 'product_title_1':product_title_1, 'product_price_2':product_price_2, 'product_price':product_price } product_infos.append(product_info) except Exception as e: print(i) print(e) continue </code></pre> <pre><code>20 'NoneType' object is not subscriptable 25 'NoneType' object is not subscriptable 76 'NoneType' object is not subscriptable 77 'NoneType' object is not subscriptable 81 'NoneType' object is not subscriptable 82 'NoneType' object is not subscriptable 86 'NoneType' object is not subscriptable 87 'NoneType' object is not subscriptable 91 'NoneType' object is not subscriptable 92 'NoneType' object is not subscriptable 96 'NoneType' object is not subscriptable 97 'NoneType' object is not subscriptable 101 'NoneType' object is not subscriptable 102 'NoneType' object is not subscriptable 106 'NoneType' object is not subscriptable 107 'NoneType' object is not subscriptable 116 'NoneType' object is not subscriptable 117 'NoneType' object is not subscriptable 121 'NoneType' object is not subscriptable 122 'NoneType' object is not subscriptable 126 'NoneType' object is not subscriptable 127 'NoneType' object is not subscriptable 131 'NoneType' object is not subscriptable 132 'NoneType' object is not subscriptable 136 'NoneType' object is not subscriptable 137 'NoneType' object is not subscriptable 141 'NoneType' object is not subscriptable 142 'NoneType' object is not subscriptable 146 'NoneType' object is not subscriptable 147 'NoneType' object is not subscriptable 866 'NoneType' object is not subscriptable 871 'NoneType' object is not subscriptable 922 'NoneType' object is not subscriptable 923 'NoneType' object is not subscriptable 927 'NoneType' object is not subscriptable 928 'NoneType' object is not subscriptable 932 'NoneType' object is not subscriptable 933 'NoneType' object is not subscriptable 937 'NoneType' object is not subscriptable 938 'NoneType' object is not subscriptable 942 'NoneType' object is not subscriptable 943 'NoneType' object is not subscriptable 947 'NoneType' object is not subscriptable 948 'NoneType' object is not subscriptable 952 'NoneType' object is not subscriptable 953 'NoneType' object is not subscriptable 963 'NoneType' object is not subscriptable 968 'NoneType' object is not subscriptable 973 'NoneType' object is not subscriptable 977 'NoneType' object is not subscriptable 978 'NoneType' object is not subscriptable 982 'NoneType' object is not subscriptable 983 'NoneType' object is not subscriptable 987 'NoneType' object is not subscriptable 988 'NoneType' object is not subscriptable 992 'NoneType' object is not subscriptable 993 'NoneType' object is not subscriptable</code></pre> <pre><code class="language-python">len(product_infos)</code></pre> <pre><code>1634</code></pre> <pre><code class="language-python">download_type_number =\ { 'product_women_bag_infos':0, 'product_men_bag_infos':0, 'product_bag_infos':0, 'product_shoes_infos':0, 'product_accessories_infos':0 }</code></pre> <pre><code class="language-python">def secoo_write_excel(download_type, book): # ''' # 读取变量 it_goods_list = product_infos sheet = book.add_worksheet(download_type) # 设置sheet表单元格列宽 sheet.set_column("A:A", 5) # 寺库 sheet.set_column("B:B", 112.88) # 商品名称 sheet.set_column("C:C", 10.5) # 正面图 sheet.set_column("D:D", 22.38) # 货号 sheet.set_column("E:E", 22.38) # 货号 sheet.set_column("F:F", 191) # 零售价 sheet.set_column("G:G", 32) # 零售价 # sheet.set_column("F:F", 82) # 商品链接 # 设定整个sheet表的单元格的格式 property = { 'font_size': 11, # 字体大小 'bold': False, # 是否加粗 'align': 'center', # 水平对齐方式 left 'valign': 'vcenter', # 垂直对齐方式 'font_name': u'微软雅黑', 'text_wrap': False, # 是否自动换行 } cell_format = book.add_format(property) # 设置sheet表单元格行高 sheet.set_row(0, 22) # 设置第一行的高度为22 # 在向单元格中写入内容时,加上单元格样式 # 插入第一行 sheet.write(0, 0, 'RED', cell_format) sheet.write(0, 1, '商品名称', cell_format) sheet.write(0, 2, '正面图', cell_format) sheet.write(0, 3, '优惠价', cell_format) sheet.write(0, 4, '原售价', cell_format) sheet.write(0, 5, '商品链接', cell_format) sheet.write(0, 6, '一级标题', cell_format) img_format = {'x_offset': 4, # 左右移动 'y_offset': 0, 'x_scale': 0.2, # 缩放比例 'y_scale': 0.19} # 插入爬取it_goods_list信息 row_number = 1 for product in it_goods_list: sheet.set_row(row_number, 52) # 设置第row_number行的高度为52 sheet.write(row_number, 1, product['product_title'], cell_format) # sheet.insert_image(row_number, 2, product['url_path'], img_format) # sheet.write(row_number, 3, product['productCode'], cell_format) # product['productCode'] sheet.write(row_number, 3, str(product['product_price']), cell_format) sheet.write(row_number, 4, str(product['product_price_2']), cell_format) sheet.write(row_number, 5, product['product_url'], cell_format) sheet.write(row_number, 6, product['product_title_1'], cell_format) row_number = row_number + 1 download_type_number[download_type] = row_number print(download_type + ': 写入EXCEL成功') return book</code></pre> <pre><code class="language-python">def secoo_write_excel_to_path(excel_name): book = xlsxwriter.Workbook(excel_name) book = secoo_write_excel('all', book) book.close() print(excel_name + ': 写入EXCEL成功') return None</code></pre> <pre><code class="language-python">from bs4 import BeautifulSoup import urllib.request # import pandas as pd import ssl import time import random import xlsxwriter import re import json import os import pickle import socket import sys from functools import partial from multiprocessing import Pool from selenium import webdriver</code></pre> <pre><code class="language-python">secoo_write_excel_to_path('red_pages-26-3.xlsx')</code></pre> <pre><code>all: 写入EXCEL成功 red_pages-26-3.xlsx: 写入EXCEL成功</code></pre>

页面列表

ITEM_HTML