修改了店铺类目分析新增2个板块。

修改了数据库连接密码补充数据召回商品数据 tk修改连接密码。重新打包

修改了店铺类目分析新增2个板块。
修改了数据库连接密码补充数据召回商品数据 tk修改连接密码。重新打包
849cc1d0 · Peng · b03eefc8 · 849cc1d0 · 849cc1d0 · 849cc1d0
Commit 849cc1d0 authored Jul 17, 2025 by Peng
13 changed files
--- a/py_spider/amazon_every_day_spider/dow_new_selection_category.py
+++ b/py_spider/amazon_every_day_spider/dow_new_selection_category.py
@@ -155,11 +155,13 @@ class dow_category_Product():
    def get_category_data(self, Category_list, driver, site):
        print('Category_list:::', Category_list)
        num = 0
-        print('click_product_name_list::',click_product_name_list)
        for Category in Category_list:
            self.cilik_site(driver)
+            product_nums = 0
+            click_product_name_list=[]
            print(Category, '   22222222222222222222222222222222222222')
            try:
+
                num += 1
                Category_name = Category
                # _Category = Category.replace('&', '\\\&')
@@ -171,7 +173,6 @@ class dow_category_Product():
                    '//h2[contains(text(),"Product Type")]/following-sibling::div/div')
                product_nums = 0
                for Product_Type in Product_Type_list:
-                    product_nums+=1
                    time.sleep(0.5)
                    driver.execute_script("localStorage.clear();")  # 清除本地存储
                    time.sleep(0.5)
@@ -193,10 +194,10 @@ class dow_category_Product():
                        save_Category_list = []
                        Product_name = Product_Type.xpath('./@id')
                        print("Product_name3222222222::", Product_name[0].upper())
-                        print('click_product_name_list::', click_product_name_list)
-                        if Product_name[0] in click_product_name_list:
-                            continue
-                        click_product_name_list.append(Product_name[0])
+                        # print('click_product_name_list::', click_product_name_list)
+                        # if Product_name[0] in click_product_name_list:
+                        #     continue
+                        # click_product_name_list.append(Product_name[0])
                        driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()")
                        time.sleep(2)
                        html = etree.HTML(driver.page_source)
@@ -381,18 +382,18 @@ class dow_category_Product():
                                print('转成int')
                                print([big_brand_int, big_asin_int,
                                       big_new_asin_int, big_per_asin_int])
-                                top_data_json = self.new_top_grossing(driver, 'Top')
-                                time.sleep(1)
-                                news_data_json = self.new_top_grossing(driver, 'News')
-                                time.sleep(1)
+                                # top_data_json = self.new_top_grossing(driver, 'Top')
+                                # time.sleep(1)
+                                # news_data_json = self.new_top_grossing(driver, 'News')
+                                # time.sleep(1)
                                save_Category_list.append(
                                    [Category_name, Product_name[0], Keyword[0], float(search_ratio),
                                     float(product_average), float(return_ratio), float(return_product_average),
                                     self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
                                     big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int,
                                     big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star,
-                                     one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json,
-                                     top_data_json, news_data_json])
+                                     one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json
+                                    ])
                                print('数据：', save_Category_list)
                            except:
                                print('============  下标。超出 。 ==========')
@@ -416,7 +417,7 @@ class dow_category_Product():
                                                               'new_asin_int', 'per_asin_int', 'five_star',
                                                               'three_star', 'two_star', 'one_star', 'ad_spend',
                                                               'majority_spend', 'most_popular_keywords_item',
-                                                               'reasons_returns_json', 'top_data_json', 'news_data_json'
+                                                               'reasons_returns_json'
                                                               ])
                                    df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_mysql,
                                              if_exists="append", index=False)
@@ -436,8 +437,7 @@ class dow_category_Product():
                                                               'new_asin_int', 'per_asin_int', 'five_star',
                                                               'three_star', 'two_star', 'one_star', 'ad_spend',
                                                               'majority_spend', 'most_popular_keywords_item',
-                                                               'reasons_returns_json', 'top_data_json',
-                                                               'news_data_json'])
+                                                               'reasons_returns_json'])
                                    df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_pg,
                                              if_exists="append", index=False)
                                    print(save_Category_list)
@@ -451,13 +451,15 @@ class dow_category_Product():
                        print('============  产品分类 下标。超出 。无数据 ==========', f"\n{traceback.format_exc()}")
                        time.sleep(2)
                        continue
-                    if product_nums>15:
-                        time.sleep(2)
-                        print('product_nums 重新启动 浏览器，')
-                        driver.close()
-                        driver.quit()
-                        time.sleep(2)
-                        self.run()
+                    # product_nums+=1
+                    # if product_nums>10:
+                    #     time.sleep(2)
+                    #     print(product_nums, 'product_nums 重新启动 浏览器，')
+                    #     driver.close()
+                    #     driver.quit()
+                    #     time.sleep(2)
+                    #     product_nums = 0
+                    #     self.run()
            except Exception as e:
                print(e, '执行错误')
                time.sleep(random.uniform(10, 20))

--- a/py_spider/amazon_every_day_spider/dow_seller_data.py
+++ b/py_spider/amazon_every_day_spider/dow_seller_data.py
@@ -41,7 +41,7 @@ class One688LoginSpider(object):
        self.engine_us_mysql = create_engine( url)

        self.engine_pg = create_engine(
-            f"postgresql+psycopg2://postgres:T#4$4%qPbR7mJx@113.100.143.162:5432/{db}",
+            f"postgresql+psycopg2://postgres:F9kL2sXe81rZq@113.100.143.162:5432/{db}",
            encoding='utf-8')
        return self.engine_us_mysql

@@ -156,7 +156,7 @@ class One688LoginSpider(object):
             '商机探测器', '是']]
        df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
                                              columns=['site_name', 'date_info', 'status', 'status_val',
-                                                       'table_name', 'date_type', 'page', 'is_end'])
+                                                       'table_name', 'report_date', 'page', 'is_end'])
        df_seller_asin_account.to_sql('workflow_everyday', con=self.engine_us_mysql, if_exists='append',
                                      index=False)


--- a/py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
+++ b/py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
@@ -155,10 +155,10 @@ class count_all_syn_st_id(BaseUtils):
 if __name__ == '__main__':
    import time
    # 根据 engine 选择那个库。爬虫库 14， 抓取me搜索词是6，爬虫一般使用14，根据情况调整
-    month = 6
+    month = 7
    engine_db_num = 14
    # for site in ['de','uk']:
-    for site in ['de']:
+    for site in ['uk']:
        time.sleep(0)
        count_all_syn_st_id(site_name=site,month=month).get_minid_maxid()
        # count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()

--- a/py_spider/threading_spider/Poll_site_search_term_month.py
+++ b/py_spider/threading_spider/Poll_site_search_term_month.py
@@ -98,7 +98,7 @@ def long_time_task(site, proxy_name, month):

 if __name__ == '__main__':
    pppoe_ip()
-    site_list = ['us','de', 'uk']
+    site_list = ['us','de','uk']
    month = int(sys.argv[1])
    week = int(sys.argv[2])
    proxy_name = None

--- a/py_spider/utils/parse_search_term_xpath.py
+++ b/py_spider/utils/parse_search_term_xpath.py
@@ -477,7 +477,8 @@ class ParseSearchTermUs(object):
    def parse_ac(self):
        try:
            asin_list = self.etree_html.xpath(
-                '//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id')
+                '//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id|//span[contains(@id,"-amazons-choice")]/@id')
+            print('ac_asin_list:::',asin_list)
            if len(asin_list):
                asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9]  # 有重复
                self.ac_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='ac'))

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_FA.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_FA.py
@@ -326,7 +326,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_chrome.quit()
+        # self.page_chrome.quit()


 if __name__ == '__main__':

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_FR.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_FR.py
@@ -326,7 +326,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_chrome.quit()
+        # self.page_chrome.quit()


 if __name__ == '__main__':

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_SA.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_SA.py
@@ -326,7 +326,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_chrome.quit()
+        # self.page_chrome.quit()


 if __name__ == '__main__':

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_ba.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_ch_ba.py
@@ -327,7 +327,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_chrome.quit()
+        # self.page_chrome.quit()


 if __name__ == '__main__':

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_edg_FA.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_edg_FA.py
@@ -333,7 +333,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_edge.quit()
+        # self.page_edge.quit()

 if __name__ == '__main__':
    TkVideo().run()

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_edg_ba.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_edg_ba.py
@@ -330,7 +330,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_edge.quit()
+        # self.page_edge.quit()

 if __name__ == '__main__':
    TkVideo().run()

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_sum_ch.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_sum_ch.py
@@ -324,11 +324,7 @@ class TkVideo():
        self.connect_redis()
        self.get_datetime()
        self.get_day()
-        print('完成关闭浏览器')
-        time.sleep(3)
-        # 如果 ChromiumPage 底层保存了 browser 对象
-        # 或者如果它是基于 Selenium WebDriver
-        self.page_chrome.quit()
+


 if __name__ == '__main__':

--- a/wangjing_projects/projects/tiktok/TK_video_data/tk_video_sum_edg.py
+++ b/wangjing_projects/projects/tiktok/TK_video_data/tk_video_sum_edg.py
@@ -344,7 +344,7 @@ class TkVideo():
        time.sleep(3)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
-        self.page_edge.quit()
+        # self.page_edge.quit()

 if __name__ == '__main__':
    TkVideo().run()