no message

000d315d · Peng · 8dd9963f · 000d315d · 000d315d · 000d315d
Commit 000d315d authored Jul 28, 2025 by Peng
7 changed files
--- a/py_spider/amazon_every_day_spider/get_junglescout_rank.py
+++ b/py_spider/amazon_every_day_spider/get_junglescout_rank.py
@@ -350,7 +350,7 @@ def junglescout_spider(db_base):
                "Accept-Encoding": "gzip, deflate, br, zstd",
                "Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
                "Cache-Control": "no-cache",
-                'Cookie': '_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN; x-hng=lang=zh-CN&domain=www.sellersprite.com; 8f00639f9c446a2d0213=54fb71d3f2c9e8acb7878e0f73abbf33; _gcl_au=1.1.420472597.1749119222.719336435.1751886424.1751886424; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751516385,1751886410,1751973053,1752031904; HMACCOUNT=800EBCCFB4C6BBFB; 65722c3d8208b58d42f9=7dc2ebaa5e4a51182da4ade1aacd8dc4; rank-guest-user=6159802571t3e3obe8rwmCywrH0Xq28vOMfd8Q+siSpAi1WiGPGuuMcYrYhXyf/QpgeBCBdgCT; rank-login-user=6159802571t3e3obe8rwmCywrH0Xq28mIqu6gO0eXYPrSqY9RlSIznMsavLuIJkOkjELzcr/d1; rank-login-user-info="eyJuaWNrbmFtZSI6Iuilv+mXqOWQuembqiIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTMzKioqKjU0MDciLCJ0b2tlbiI6IjYxNTk4MDI1NzF0M2Uzb2JlOHJ3bUN5d3JIMFhxMjhtSXF1NmdPMGVYWVByU3FZOVJsU0l6bk1zYXZMdUlKa09rakVMemNyL2QxIn0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJJUGFLc3VqMkZsUmpPR1NRQnIxYkJRIiwiaWF0IjoxNzUyMDMxOTE2LCJleHAiOjE3NTIxMTgzMTYsIm5iZiI6MTc1MjAzMTg1Niwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTMzNDkzLCJwaSI6bnVsbCwibm4iOiLopb_pl6jlkLnpm6oiLCJzeXMiOiJTU19DTiIsImVkIjoiTiIsInBobiI6IjEzMzkyNDE1NDA3IiwiZW0iOiJxcTE2NTMxMjE4NjUzQDE2My5jb20iLCJtbCI6IkcifQ.mLIjN_qO4K8w18IDVa0GCRY3MODTmJhZlQaPbgBjeYJRPDwteHfkfqFS_GFyLu4svoahzyFRxkdnKhxs1x90QxQ-7QCwjwypbk8On6gMarKl8jopo9sJbZITvk8mrqtoT6N34LZ1ash35iAkIuPZONPMH8_cp5NxiSC70J12fvIT9ZXp-9zvEk6WV8qQ3pRr0yRuGnSsuWjVvDE9WRNpE3ZmYS_EUBroA51yBEPdS8aBThRuuVGt4HuqrPXp9ZwHoiOcRYu1VcQu-wpIAhLfXcnY1vJA3FXm7w_H00DOGZuM9HRcxdg6Fj-2WP5FvCxbE8z5n1-zbQMs_J8JVaVXgQ; ao_lo_to_n="6159802571t3e3obe8rwmCywrH0Xq28osFyhyxlRsfXXDx9AUjMD2qAFgWUPkLF84KewBkZoL5OL21x5jznuxdPNdiJfglPNE7YH03Vk5CofaP+MGH3y8="; _gaf_fp=01fef3c14bfcaf5a01438f74a677e95a; _ga_38NCVF2XST=GS2.1.s1752031904$o47$g1$t1752031923$j41$l0$h1543227925; _ga_CN0F80S6GL=GS2.1.s1752031906$o46$g1$t1752031924$j42$l0$h0; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1752035308; JSESSIONID=165F9BAA752FE5B22CCD7C5BB7B62F2F',
+                'Cookie': '_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1752031904,1752460043,1752653436,1753353401; HMACCOUNT=800EBCCFB4C6BBFB; 894cdd1d9741ce0c9757=827b7d3d13ed7bd6b4b1b24d0246b3dc; 3d854e1bcd61963fdf05=38fcb3b742a48aa345ddfd7136bc60ee; _gaf_fp=f297033bfe53aa9891ffe2842271566b; _gcl_au=1.1.420472597.1749119222.1054917286.1753685435.1753685437; rank-guest-user=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9lihUCQIaVmrHXjbpSRP/Ca0F; rank-login-user=6303473571KK6FnhfedvWg9tSSyk3xj2GRIc/8HSm4vuPYVHI5vKLXnssgei5ccK1dG8fkQSFI; rank-login-user-info=eyJuaWNrbmFtZSI6IuW4heWTpSIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTgzKioqKjczNDciLCJ0b2tlbiI6IjYzMDM0NzM1NzFLSzZGbmhmZWR2V2c5dFNTeWszeGoyR1JJYy84SFNtNHZ1UFlWSEk1dktMWG5zc2dlaTVjY0sxZEc4ZmtRU0ZJIn0=; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJKc2pZSlZWeFZzTVptVWFvMzgtZ3RRIiwiaWF0IjoxNzUzNjg1NDM2LCJleHAiOjE3NTM3NzE4MzYsIm5iZiI6MTc1MzY4NTM3Niwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIxNSwicGkiOm51bGwsIm5uIjoi5biF5ZOlIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxODMwNzk2NzM0NyIsImVtIjoiMzE1OTk4MDg5MkBxcS5jb20iLCJtbCI6IkcifQ.EaQ7Md7iVOpjZDogkiS2DlndhFPt3GzL2t33LXnh9Z5Itr3A8scFM_tzrYuzXqF6a-BDIMFe90SdDtU18zs9WTTl6_Phv3AEqcDe6WDfPAhB_KMa15VYAE5-b9d3lgIukKR8ZZyAMpiJzcmIWShmqxrhCNQD0ER3b7idaJpSrJiKnwV-tj6La52WJ6BmVRAk8gst0p5h-SYVnNz9iNaSXLc2Dx-hHZvMVNU27yfbJgKPpzRxgh7TOD7O-cT0WrEoKvTSw9e81gG9bgvKuA_bD-z3ePhgM6prUfceWszD88KH8PcXua9s_8ZM4bgrMyKMHswLtwyLhWePcvtHUp6yyQ; ao_lo_to_n=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9liibP9br/hwQ1Dlb4xDZyVPrTQIst5JCVz4PpnUIlDMGE07YVPYBWOm3Hrx4PaVkgaQ=; _ga_38NCVF2XST=GS2.1.s1753685428$o61$g1$t1753685444$j44$l0$h984121357; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1753685445; _ga_CN0F80S6GL=GS2.1.s1753685429$o59$g1$t1753685445$j44$l0$h0; JSESSIONID=F09543D3A3D6F890BAD0F422FCA49942',
                "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            }
            url = "https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
@@ -368,7 +368,7 @@ def junglescout_spider(db_base):
                    response = json.loads(response.text)
                    break
                except:
-                    time.sleep(random.uniform(15, 35.75))
+                    time.sleep(random.uniform(15, 30.75))
            response_data = response['data']
            print('code::', response['code'])
@@ -387,7 +387,7 @@ def junglescout_spider(db_base):
            print('获取数据：', category_name['name'], i, est, year_month)
            sales = int(est)
            name_rnak_list.append((category_name['name'], i, sales, year_month))
-            time.sleep(random.uniform(20, 75.75))
+            time.sleep(random.uniform(20, 65.75))
            # break
        for i in range(4):
            try:
@@ -408,7 +408,7 @@ def junglescout_spider(db_base):
                cursor_us_mysql_db, db_us = db_class_us.us_mysql_db()  # us 站点 mysql
                time.sleep(20)
        print('当前完成。获取下一个分类销量')
-        time.sleep(random.uniform(120, 240.5))
+        time.sleep(random.uniform(90, 200.5))
 def save_site_category(site_bsr_dict=None):

--- a/py_spider/amazon_every_day_spider/junglescout_spider.py
+++ b/py_spider/amazon_every_day_spider/junglescout_spider.py
@@ -85,12 +85,12 @@ def junglescout_spider(db_base):
                "Accept-Encoding": "gzip, deflate, br, zstd",
                "Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
                "Cache-Control": "no-cache",
-                'Cookie': '_ga=GA1.1.19240078.1751854600; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751854601; HMACCOUNT=28ABEEABEFA97E4A; _gcl_au=1.1.536675967.1751854601; MEIQIA_TRACK_ID=2zWlEnsYAqnZRdhJqJ5txX7tpXm; MEIQIA_VISIT_ID=2zWlEmUkBQV745rliAtXEdAk0CJ; ecookie=ZyZ05gxOxlDTPkM1_CN; 8f00639f9c446a2d0213=54fb71d3f2c9e8acb7878e0f73abbf33; _fp=65dbbe41a37f8f9fbe702eba96328267; _gaf_fp=e03eac62da4f8988dc796341e1bd822c; current_guest=jsxcNvsgBJO1_250707-100340; rank-login-user=502219157192wVgAJpdturGN5Im+nPDQqTtoVYwVNo1oWP9MD0mtMHFwS3LrhtAUhuCnvMHsCl; rank-login-user-info="eyJuaWNrbmFtZSI6IuWViuWTiOWTiOWTiCIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTUzKioqKjEyNzAiLCJ0b2tlbiI6IjUwMjIxOTE1NzE5MndWZ0FKcGR0dXJHTjVJbStuUERRcVR0b1ZZd1ZObzFvV1A5TUQwbXRNSEZ3UzNMcmh0QVVodUNudk1Ic0NsIn0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiIwZ01FdlJuNWJ1dlZhVW5IZ1lKSDFRIiwiaWF0IjoxNzUxODU0NjA1LCJleHAiOjE3NTE5NDEwMDUsIm5iZiI6MTc1MTg1NDU0NSwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIwMSwicGkiOm51bGwsIm5uIjoi5ZWK5ZOI5ZOI5ZOIIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxNTM2ODA1MTI3MCIsImVtIjoibWVpeW91bGFAbWVpeW91bGEuY29tIiwibWwiOiJHIn0.Ujr6_K3vHIQRw3x52QAQdTftMy6GbZ_TunmFMgW76onCy3EkBzx7uxEv-42zRRXgKLMUfJz2t0ierqXV6Evh9i-o5F0ZUBREzm48LHpGSw6Iupjx4Udc3VQwVqgiUOmYBvnTAQqmaj6iA5l06zAZcVNHQASZ5xe5QFUCllIOL0m8tf3Xad6T8u5oLHRHTTuyy5nDAqLu6ZxVOqUYYXsIzq9H2qAsPhqIgRy_5Av1zyoAcQErddadCe25H_ILmKO0Az9ANIFg4o1r_is_VFVZpGvbz8nCN0JLuY3uajAjf2JXoEzhHT9YbMP0o2TrZDRPdORV3HVK1N5uvghRaRyJvw; ao_lo_to_n="502219157192wVgAJpdturGN5Im+nPDfbd9htCMUGF/tdMS8/gmBNzv9/utYT5ucwmHHPC71S6i4RnT3fLUZW/nDI61eZx1uqLqr+hBy0X/aeJ6c/sSSc="; rank-guest-user=502219157192wVgAJpdturGN5Im+nPDYsyQgRxjbXtKYdDjju8ax0OkcsNUNGWP3xY6uiwKVVO; JSESSIONID=96FF611DCBDF20B9C6C921EAD2A55205; _ga_38NCVF2XST=GS2.1.s1751854600$o1$g1$t1751854612$j48$l0$h1855838417; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1751854612; _ga_CN0F80S6GL=GS2.1.s1751854600$o1$g1$t1751854613$j47$l0$h0',
+                'Cookie': '_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN; x-hng=lang=zh-CN&domain=www.sellersprite.com; a40ac813159995d028ba=3d9b7c15f5787387e62acd734f598f23; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751973053,1752031904,1752460043,1752653436; HMACCOUNT=800EBCCFB4C6BBFB; rank-guest-user=8301172571YFpPM/DhYDVQzRAgRu7tcQTFTi48nSnOk/TNMkep2gdtR77QXyNfDPmFlYbdSsdL; rank-login-user=8301172571YFpPM/DhYDVQzRAgRu7tcWqD2KCbe1WiKcOarfxTCdls3AJ9lNFy+VA8a+RTm195; rank-login-user-info=eyJuaWNrbmFtZSI6Iuilv+mXqOWQuembqiIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTMzKioqKjU0MDciLCJ0b2tlbiI6IjgzMDExNzI1NzFZRnBQTS9EaFlEVlF6UkFnUnU3dGNXcUQyS0NiZTFXaUtjT2FyZnhUQ2RsczNBSjlsTkZ5K1ZBOGErUlRtMTk1In0=; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJLcVRRV2RPbVNNcjlKTU1qYTdXRjFRIiwiaWF0IjoxNzUyNjUzNDM4LCJleHAiOjE3NTI3Mzk4MzgsIm5iZiI6MTc1MjY1MzM3OCwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTMzNDkzLCJwaSI6bnVsbCwibm4iOiLopb_pl6jlkLnpm6oiLCJzeXMiOiJTU19DTiIsImVkIjoiTiIsInBobiI6IjEzMzkyNDE1NDA3IiwiZW0iOiJxcTE2NTMxMjE4NjUzQDE2My5jb20iLCJtbCI6IkcifQ.caY2QxTbtUVg7CQXvNJcmVo1YU0TGy3AD01dIddF76PHjYbbFh5a8zZAdAXnAKM1wNcs39d1MM8Wa-uoXHiitqDlCZsWyF9aXzco9L4wn-yU4xlMYsf7VoquZI6bxaMT2TNeX6vgQBod-NeXHYFpZQWdrH5sfZHQypkpRINb_o1QwaWvZrjufj1UwYdiypryBxTDyCuLfD4djU0PLMRXvifY6Ef86VNjAlsY8gFqDdHiVLixR2GWGdKRtoG74Ak5DX2eMDT6ak-OMrWYOaikthxIXiqdADTq2tvUCmjO4pE0oYnWhSEx9-UABo7jxJ0v_Af8B6AVu7ccC0NUUvWBMA; ao_lo_to_n=8301172571YFpPM/DhYDVQzRAgRu7tca/7vKUOAtDW4w4LhsAzrvlsqk8xCK+opMY27DGtrDKlwUwhqg///+C6QOw12iRKNIq9mCOV5+ORmOA+PwqisF4=; _gaf_fp=0f3f9e0c791b5513d38aa715d0624aab; _gcl_au=1.1.420472597.1749119222.448034571.1752653439.1752653439; JSESSIONID=0F617D64E2FD6DD92F3BB10935E3C846; _ga_38NCVF2XST=GS2.1.s1752653436$o51$g1$t1752653450$j46$l0$h366949276; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1752653451; _ga_CN0F80S6GL=GS2.1.s1752653437$o50$g1$t1752653451$j46$l0$h0',
                "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            }
            url = "https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
            data = {
-                "station": "US",
+                "station": "UK",
                "cid": category_name['c_id'],  # 分类id
                "bsr": f"{i}"  # 排名
            }
@@ -167,7 +167,7 @@ def save_site_category(site_bsr_dict=None):
 def run():
    # get_cid()
-    junglescout_spider('us')
+    junglescout_spider('uk')
 if __name__ == '__main__':

--- a/py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
+++ b/py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
@@ -158,7 +158,7 @@ if __name__ == '__main__':
    month = 7
    engine_db_num = 14
    # for site in ['de','uk']:
-    for site in ['uk']:
+    for site in ['us']:
        time.sleep(0)
        count_all_syn_st_id(site_name=site,month=month).get_minid_maxid()
        # count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()

--- a/py_spider/amazon_spider/asin_detail_pg.py
+++ b/py_spider/amazon_spider/asin_detail_pg.py
@@ -214,7 +214,8 @@ class async_asin_pg():
                            'created_time': new_date, 'current_asin': items['current_asin'],
                            'parent_asin': items["parentAsin"], 'div_id_list': items['div_id_list'],
                            'bundles_this_asins_json': items['bundles_this_asins_data_json'],
-                            'video_m3u8_url': items["video_m3u8"], 'result_list_json': items['result_list_json']
+                            'video_m3u8_url': items["video_m3u8"], 'result_list_json': items['result_list_json'],
+                            'bundle_asin_component_json':items['bundle_asin_component_json']
                            }
                    if self.site_name in ['uk', 'de', 'fr', 'es', 'it']:
                        item['five_six_val'] = items['five_six_val']
@@ -222,8 +223,6 @@ class async_asin_pg():
                        item['five_six_val'] = None
                        # 第二次请求
                    _response_text = None
-                    # if (item['weight'] is None and item['volume'] is None and item['rank'] is None and item[
-                    #     'launch_time'] is None) or (item['variat_num'] > 0 and is_variat == '0'):
                    if item['variat_num'] > 0 and is_variat == '0':
                        self.request_total_count_list.append(4)
                        if item['variat_num'] > 0:
@@ -478,7 +477,7 @@ class async_asin_pg():
    def run(self):
        asin_list = self.save_asin_detail.read_db_data()
-        # asin_list = ['B07BXM8RZ3|2025-01|1|1|null|null','B07FM8P1Z1|2025-01|1|1|null|null','B07TWHCK69|2025-01|1|1|null|null']
+        # asin_list = ['B0BPKK2BMN|2025-01|1|1|null|null']
        if asin_list:
            for asin in asin_list:
                self.queries_asin_queue.put(asin)

--- a/py_spider/amazon_spider/recall_cases_spider.py
+++ b/py_spider/amazon_spider/recall_cases_spider.py
@@ -14,7 +14,7 @@ import html
 import re
 from html import unescape
 import urllib.parse
+from sqlalchemy import text
 class recall_cases():
@@ -90,6 +90,15 @@ class recall_cases():
        if response_detail:
            recall_date_list = response_detail.xpath("//div[contains(text(),'Recall Date:')]/parent::div/text()")
            product_title_list = response_detail.xpath("//div[contains(text(),'Name of Product:')]/parent::div/text()")
+            if product_title_list:
+                matches = re.findall(r'[A-Za-z\-®]+(?: [A-Za-z\-®]+)*', product_title_list[-1].strip())
+                if matches:
+                    brand = ','.join(matches)
+                else:
+                    brand = None
+            else:
+                brand = None
            hazard_list = response_detail.xpath("//div[contains(text(),'危险:')]/parent::div//p//text()")
            image_url_list = response_detail.xpath("//div[@id='recall-gallery-img']//li/img/@src")
            recall_date = recall_date_list[-1].strip() if recall_date_list else None  # 召回日期
@@ -98,7 +107,7 @@ class recall_cases():
            image_url = 'https://www.cpsc.gov' + image_url_list[0].strip() if image_url_list else None  # 图片
            if recall_date:
                recall_date = self._parse_date_str(recall_date)
-            data_list = ['us_recalls_product', recall_date, product_title, hazard, image_url, a_href]
+            data_list = ['us_recalls_product', recall_date, product_title, hazard, image_url, a_href,brand]
            return data_list
        else:
            return None
@@ -140,7 +149,7 @@ class recall_cases():
                            df = pd.DataFrame(data=save_data_list,
                                              columns=['data_type', 'recall_date', 'product_title', 'hazard',
                                                       'image_url',
-                                                       'ext_url', 'recall_title', 'country'])
+                                                       'ext_url','brand', 'recall_title', 'country'])
                            df.to_sql('recall_cases_data', con=self.mysql_db, if_exists="append", index=False)
                        except:
                            is_None = False
@@ -207,6 +216,7 @@ class recall_cases():
                dict_item = response.json()
                data_lists = dict_item['data']
                for data in data_lists:
+                    print(data,'344444444')
                    data_list = []
                    try:
                        # 逐项解码
@@ -234,18 +244,24 @@ class recall_cases():
                        response2 = self._request(headers=headers, url=url)
                        response_detail = etree.HTML(response2.text)
                        src_list = response_detail.xpath("//div[@id='recall-photos']//img/@src")
+                        Brand_list = response_detail.xpath("//div[contains(text(),'Brand Name')]/following-sibling::div//text()")
+                        if Brand_list:
+                            brand = ''.join(Brand_list).strip()
+                        else:
+                            brand = None
+                        print(brand,'Brand_list::',Brand_list)
                        if src_list:
                            image_url = 'https://www.fda.gov' + src_list[0]
                        else:
                            image_url = None
                        print('image_url:', image_url)
                        data_list.append(['us_fba_recalls', date, link_text, hazard, image_url, url, recall_title, 'us',
-                                          product_category])
+                                          product_category,brand])
                        try:
                            df = pd.DataFrame(data=data_list,
                                              columns=['data_type', 'recall_date', 'product_title', 'hazard',
                                                       'image_url',
-                                                       'ext_url', 'recall_title', 'country', 'product_category'])
+                                                       'ext_url', 'recall_title', 'country', 'product_category','brand'])
                            df.drop_duplicates(['recall_date', 'product_title', 'ext_url'], inplace=True)
                            df.to_sql('recall_cases_data', con=self.mysql_db, if_exists="append", index=False)
                        except:
@@ -260,7 +276,7 @@ class recall_cases():
    def ec_europa_eu(self):
        '欧盟召回'
-        for i in range(0, 32):
+        for i in range(1, 33):
            url = 'https://ec.europa.eu/safety-gate-alerts/public/api/notification/carousel/?'
            data = {"language": "en", "page": f"{i}"}
            headers = {
@@ -284,6 +300,7 @@ class recall_cases():
            print(data, '请求列表页url:', url)
            is_None = True
            response = requests.post(url, headers=headers, impersonate="chrome120", timeout=120, json=data)
+            print(response.url)
            if response:
                content = response.json()['content']
                for ids in content:
@@ -314,11 +331,17 @@ class recall_cases():
                    recall_title = items['product']['versions'][0]['description']
                    print(product_title)
                    print(recall_title)
+                    brands = items['product']['brands']
+                    if brands:
+                        brand = brands[0].get('brand')
+                    else:
+                        brand = None
+                    print('brand::1',brand)
                    hazard = items['risk']['versions'][0]['riskDescription']
                    print(hazard)
-                    ext_url = 'https://ec.europa.eu/safety-gate-alerts/screen/webReport/alertDetail/' + str(items['id']) + '?lang=en'
+                    ext_url = 'https://ec.europa.eu/safety-gate-alerts/screen/webReport/alertDetail/' + str(
-                    print('ext_url::',ext_url)
+                        items['id']) + '?lang=en'
+                    print('ext_url::', ext_url)
                    if items['product']['photos']:
                        image_id = items['product']['photos'][0]['id']
                        image_url = f'https://ec.europa.eu/safety-gate-alerts/public/api/notification/image/{image_id}'
@@ -326,30 +349,36 @@ class recall_cases():
                        image_url = None
                    print(image_url)
                    data_list.append(
-                        [date, product_category, product_title, recall_title, hazard, 'eu_recall', image_url, 'eu', ext_url,
+                        [date, product_category, product_title, recall_title, hazard, 'eu_recall', image_url, 'eu',
-                         data_json])
+                         ext_url,data_json,brand])
-                    # try:
-                        # df = pd.DataFrame(data=data_list,
+                    keys = [
-                        #                   columns=['recall_date', 'product_category', 'product_title', 'recall_title',
+                        "recall_date", "product_category", "product_title", "recall_title",
-                        #                            'hazard', 'data_type', 'image_url',
+                        "hazard", "data_type", "image_url", "country", "ext_url", "data_json", "brand"
-                        #                            'country', 'ext_url', 'data_json'])
+                    ]
-                        # df.drop_duplicates(['recall_date', 'product_title', 'ext_url'], inplace=True)
-                        # df.to_sql('recall_cases_data', con=self.mysql_db, if_exists="append", index=False)
+                    # 把 list of list 转成 list of dict
+                    dict_list = [dict(zip(keys, row)) for row in data_list]
                    with self.mysql_db.begin() as conn:
                        conn.execute(
-                            f"insert into recall_cases_data (recall_date, product_category, product_title,recall_title,hazard,"
+                            text("""
-                            f"data_type,image_url,country,ext_url,data_json) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE recall_date = values(recall_date),product_title=values (product_title),ext_url=values (ext_url)",
+                                INSERT INTO recall_cases_data 
-                            data_list)
+                                (recall_date, product_category, product_title, recall_title, hazard, data_type, image_url, country, ext_url, data_json, brand)
-                    # except:
+                                VALUES (:recall_date, :product_category, :product_title, :recall_title, :hazard, :data_type, :image_url, :country, :ext_url, :data_json, :brand)
-                    #     is_None = False
+                                ON DUPLICATE KEY UPDATE 
-                    #     break
+                                    recall_date = VALUES(recall_date),
+                                    product_title = VALUES(product_title),
+                                    ext_url = VALUES(ext_url)
+                            """),
+                            dict_list
+                        )
                if is_None == False:
                    break
            else:
                break
            time.sleep(random.uniform(2, 8))
    def ec_europa_uk(self):
        'https://www.gov.uk/product-safety-alerts-reports-recalls?page=2'
        url = 'https://www.gov.uk/product-safety-alerts-reports-recalls'
@@ -392,6 +421,8 @@ class recall_cases():
                    print('产品标题：', product_title)
                    hazard_list = resp_html.xpath("//p[contains(text(),'Hazard:')]/text()")
                    print('风险：', hazard_list)
+                    Brand_list = resp_html.xpath("//td[contains(text(),'Brand')]/following-sibling::td/text()")
+                    brand = Brand_list[0].strip() if Brand_list else None
                    image_url_list = resp_html.xpath("//span[@class='attachment-inline']/a/@href")
                    product_category = product_category[0].strip() if product_category else None
                    product_title = product_title[0].strip().replace('Product:', '') if product_title else None
@@ -399,13 +430,13 @@ class recall_cases():
                    image_url_list = image_url_list[0].strip() if image_url_list else None
                    data_list.append(
                        [recall_title, detail_url, recall_date, product_category, product_title,
-                         hazard_list, image_url_list, 'uk_recall', 'uk'])
+                         hazard_list, image_url_list, 'uk_recall', 'uk',brand])
                    if data_list:
                        try:
                            df = pd.DataFrame(data=data_list,
                                              columns=['recall_title', 'ext_url', 'recall_date', 'product_category',
                                                       'product_title',
-                                                       'hazard', 'image_url', 'data_type', 'country'])
+                                                       'hazard', 'image_url', 'data_type', 'country','brand'])
                            df.drop_duplicates(['recall_date', 'product_title', 'ext_url'], inplace=True)
                            df.to_sql('recall_cases_data', con=self.mysql_db, if_exists="append", index=False)
@@ -554,9 +585,12 @@ class recall_cases():
    def get_globalrecalls(self):
        # sql = 'SELECT data_json FROM global_recalls_data'
        # df_data = pd.read_sql(sql, con=self.mysql_db)
        list_url = 'https://globalrecalls.oecd.org/ws/search.xqy?end=20&lang=en&order=desc&q=&sort=date&start=0&uiLang=en'
+        # list_url = f'https://globalrecalls.oecd.org/ws/search.xqy?end={i}&lang=en&order=desc&q=&sort=date&start={i - 20}&uiLang=en'
+        print('请求url', list_url)
+        # 'https://globalrecalls.oecd.org/ws/search.xqy?end=200&lang=en&order=desc&q=&sort=date&start=180&uiLang=en'
        headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br, zstd',
            'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
@@ -574,7 +608,8 @@ class recall_cases():
        for result in result_list:
            countryId = result['countryId']
            imageUri = result['imageUri']
-            if countryId.lower() in ['us', 'ca', 'mx', 'nl', 'sa', 'se', 'pl', 'tr', 'be', 'uk', 'de', 'es', 'fr', 'it',
+            if countryId.lower() in ['us', 'ca', 'mx', 'nl', 'sa', 'se', 'pl', 'tr', 'be', 'uk', 'de', 'es', 'fr',
+                                     'it',
                                     'jp']:
                date_time = result['date']
                extUrl = result['extUrl']
@@ -600,7 +635,11 @@ class recall_cases():
                    if 'ENTITY_NOT_FOUN' in resp.text:
                        continue
                    items_data = resp.json()
+                    brands = items_data['product']['brands']
+                    if brands:
+                        brand = brands[0].get('brand')
+                    else:
+                        brand = None
                    time.sleep(random.uniform(1, 3))
                    items['country'] = countryId
                    items['reacll_time'] = date_time
@@ -616,17 +655,19 @@ class recall_cases():
                    items['data_type'] = 'global_recalls'
                    items['product_title'] = re.findall(r'^(.*?)\s*;', title_name + ';')[0]
                    items['ext_url'] = extUrl
+                    items['brand'] = brand
                    data_json = json.dumps(items_data)
                    data_list.append([items['data_type'], items['product_title'], items['productCategory'],
                                      items['reacll_time'], items['riskDescription'], items['country'],
                                      items['image_url'],
-                                      items['recall_title'], items['ext_url'], data_json])
+                                      items['recall_title'], items['ext_url'], data_json,items['brand']])
-                    print(items)
+                    print('itemsitems::',items)
                    try:
                        df = pd.DataFrame(data=data_list,
                                          columns=['data_type', 'product_title', 'product_category', 'recall_date',
                                                   'hazard',
-                                                   'country', 'image_url', 'recall_title', 'ext_url', 'data_json'])
+                                                   'country', 'image_url', 'recall_title', 'ext_url', 'data_json','brand'])
                        df.to_sql('recall_cases_data', con=self.mysql_db, if_exists="append", index=False)
                    except:
                        print('数据重复=====')
@@ -635,17 +676,18 @@ class recall_cases():
                    print('没有解析到id')
                    items = {}
                    url = result['uri']
-                    items['country'] = countryId # 站点
+                    items['country'] = countryId  # 站点
                    encoded_url = urllib.parse.quote(url)
-                    _url = 'https://globalrecalls.oecd.org/ws/getrecall.xqy?uiLang=en&uri='+encoded_url
+                    _url = 'https://globalrecalls.oecd.org/ws/getrecall.xqy?uiLang=en&uri=' + encoded_url
+                    print('_url::',_url)
                    resp = requests.get(_url, headers=headers, timeout=60)
                    result = resp.json()
-                    print("result::",result)
+                    print("result::", result)
                    time.sleep(random.uniform(1, 3))
-                    extUrl = result['recall']['extUrl'] # 详情url
+                    extUrl = result['recall']['extUrl']  # 详情url
                    imageUri = result['recall']['images'][0]['imageUri']
                    encode_imageUri = urllib.parse.quote(imageUri)
-                    imaurl = f"https://globalrecalls.oecd.org/ws/getdocument.xqy?uri={encode_imageUri}" # 图片
+                    imaurl = f"https://globalrecalls.oecd.org/ws/getdocument.xqy?uri={encode_imageUri}"  # 图片
                    date_time = result['recall']['date']
                    items['reacll_time'] = date_time
                    title_name = result['recall']['product.name']
@@ -653,7 +695,7 @@ class recall_cases():
                    if recall_title is None:
                        recall_title = result['recall']['images'][0]['alt.text']
                    if recall_title:
-                        recall_title.replace('Image of ','')
+                        recall_title.replace('Image of ', '')
                    hazard = result['recall']['hazard']
                    items['recall_title'] = recall_title
                    items['productCategory'] = result['recall']['product.type']
@@ -667,7 +709,7 @@ class recall_cases():
                                      items['reacll_time'], items['riskDescription'], items['country'],
                                      items['image_url'],
                                      items['recall_title'], items['ext_url'], data_json])
-                    print('没有解析到id的数据：',items)
+                    print('没有解析到id的数据：', items)
                    try:
                        df = pd.DataFrame(data=data_list,
                                          columns=['data_type', 'product_title', 'product_category', 'recall_date',
@@ -677,6 +719,7 @@ class recall_cases():
                    except:
                        print('没有解析到id 存储 数据重复=====')
                        continue
    def run(self):
        # self.global_recalls()
        self.get_globalrecalls()
@@ -685,25 +728,41 @@ class recall_cases():
        self.ec_europa_eu()
        self.ec_europa_uk()
        self.gov_uk()
+        # """
+#         数据类型，属于那个国的
+# eu_recall
+# global_recalls
+# uk_drug_device 1
+# uk_recall 2
+# us_fba_recalls 3
-        # with self.mysql_db.begin() as conn:
+# us_recalls_product
-        #     sql = "SELECT data_json FROM recall_cases_data WHERE data_type='eu_recall'"
+#         """
-        #     df_data = pd.read_sql(sql, con=self.mysql_db)
+#         with self.mysql_db.begin() as conn:
-        #     data_json_list = list(df_data.data_json)
+#             sql = "SELECT id,product_title FROM recall_cases_data WHERE data_type='us_recalls_product'"
-        #     for data_json in data_json_list:
+#             df_data = pd.read_sql(sql, con=self.mysql_db)
-        #         data_dict = json.loads(data_json)
+#             df_data['id'] = df_data['id'].fillna('').astype(str)
-        #         print(data_dict['product']['photos'][0]['id'])
+#             df_data['product_title'] = df_data['product_title'].fillna('').astype(str)
-        #         imgurl = f"https://ec.europa.eu/safety-gate-alerts/public/api/notification/image/{data_dict['product']['photos'][0]['id']}"
+#             data_json_list = list(df_data.id+ "|=||+||" + df_data.product_title)
-        #         up_sql = f"update recall_cases_data set image_url ='{imgurl}' WHERE data_type='eu_recall' and image_url like '%%/image/{data_dict['product']['photos'][0]['id']}'"
+#             for data_json_id in data_json_list:
-        #         print(up_sql)
+#                 if data_json_id:
-        #         conn.execute(up_sql)
+#                     data_json_id_list = data_json_id.split('|=||+||')
+#                     id = data_json_id_list[0]
+#                     product_title = data_json_id_list[1]
+#                     print(product_title)
+#                     if bool(re.search(r'[\u4e00-\u9fff]', product_title)):
+#                         # print(product_title,'23333333')
+#                         matches = re.findall(r'[A-Za-z\-®]+(?: [A-Za-z\-®]+)*', product_title.strip())
+#                         if matches:
+#                             brand = ','.join(matches)
+#                         else:
+#                             brand = None
+#                         print(id, brand,'23444444444')
+#                         if brand:
+#                             brand = brand.replace('"','').replace("'","")
+#                             up_sql = f"""update recall_cases_data set brand ="{brand}" WHERE id={id}"""
+#                             print(up_sql)
+#                             conn.execute(up_sql)
 if __name__ == '__main__':

--- a/py_spider/amzon_parse_db_html/pares_html.py
+++ b/py_spider/amzon_parse_db_html/pares_html.py
@@ -35,7 +35,7 @@ class Parse_asin_html():
            print('没有该 asin html')
    def search_term_html(self, site_name='us', month='04'):
-        sql = f"SELECT search_term,page,html FROM search_term_html_2025_{month} WHERE search_term='lace white tops for women' and site_name = '{site_name}'"
+        sql = f"SELECT search_term,page,html FROM search_term_html_2025_{month} WHERE search_term='resin kit' and site_name = '{site_name}'"
        print(sql)
        df = pd.read_sql(sql, con=engine_strrocks)
        print(df.values)
@@ -52,8 +52,8 @@ class Parse_asin_html():
            print('没有该 搜索词 html')
    def run(self):
-        self.asin_html()
+        # self.asin_html()
-        # self.search_term_html(site_name='us',month='04')
+        self.search_term_html(site_name='uk',month='07')
 if __name__ == '__main__':

--- a/py_spider/utils/asin_parse.py
+++ b/py_spider/utils/asin_parse.py
@@ -402,7 +402,7 @@ class ParseAsinUs(object):
                    break
        if min_match_asin_data_list:
            min_match_asin_json = json.dumps(min_match_asin_data_list, ensure_ascii=False)
-        # bundles_this_asins ,Bundles with this item B0BPV8R4K8
+        # bundles_this_asins ,Bundles with this item B0BPV8R4K8 变体下方位置。和五点描述挨着
        bundles_this_asins_data_list = []
        bundles_this_asins_data_json = None
        for i in ASIN_XPATH['bundles_this_asins']:
@@ -436,7 +436,48 @@ class ParseAsinUs(object):
                    break
        if bundles_this_asins_data_list:
            bundles_this_asins_data_json = json.dumps(bundles_this_asins_data_list, ensure_ascii=False)
+        # 捆绑销售 B0DD8W2DZD This bundle contains 2 items
+        href_asin_list = self.response_s.xpath(
+            "//div[@class='bundle-title']/following-sibling::div//div[@class='bundle-components']//div[contains(@id,'bundle-component-details-component-title')]/a/@href")
+        bundle_asin_component_list = []
+        if href_asin_list:
+            bundle_component_asin_list = []
+            for href_asin in href_asin_list:
+                i_asin_list = re.findall(r'(?:[A-Z0-9]{10}|[0-9]{10})', href_asin)
+                bundle_component_asin_list.append(i_asin_list[0])
+            if bundle_component_asin_list:
+                bundle_component_asin_list = list(set(bundle_component_asin_list))
+                for bundle_component_asin in bundle_component_asin_list:
+                    print('bundle_component_asin:', bundle_component_asin)
+                    bundle_title_list = self.response_s.xpath(
+                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div[contains(@id,'component-details-component-title')]/a/text()")
+                    bundle_asin_title = bundle_title_list[0] if bundle_title_list else None
+                    bundle_img_list = self.response_s.xpath(f"//a[contains(@href,'{bundle_component_asin}')]/img/@src")
+                    bundle_asin_img = bundle_img_list[0] if bundle_img_list else None
+                    bundle_review_list = self.response_s.xpath(
+                        rf"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review')]//following-sibling::span/text()")
+                    bundle_asin_review = bundle_review_list[0] if bundle_review_list else None
+                    bundle_starslist = self.response_s.xpath(
+                        rf"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review-stars')]/@class")
+                    bundle_stars = bundle_starslist[0] if bundle_starslist else None
+                    bundle_stars_list = re.findall(r'a-star-(.*?) ', bundle_stars)
+                    bundle_asin_star = bundle_stars_list[0].replace('-', '.') if bundle_stars_list else None
+                    bundle_asin_price_list = self.response_s.xpath(
+                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::div[contains(@class,'component-details-component-prices')]/span/text()")
+                    bundle_asin_price = bundle_asin_price_list[0] if bundle_asin_price_list else None
+                    bundle_asin_point_list = self.response_s.xpath(
+                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::ul/li[contains(@id,'component-details-component-bullet-point')]/span/text()")
+                    bundle_asin_point = '|-|'.join(bundle_asin_point_list) if bundle_asin_point_list else None
+                    bundle_component_asin_item = {"bundle_component_asin":bundle_component_asin,"bundle_asin_title":bundle_asin_title,
+                                                  'bundle_asin_img':bundle_asin_img,"bundle_asin_review":bundle_asin_review,
+                                                  "bundle_asin_star":bundle_asin_star,"bundle_asin_price":bundle_asin_price,
+                                                  "bundle_asin_point":bundle_asin_point}
+                    bundle_asin_component_list.append(bundle_component_asin_item)
+        if bundle_asin_component_list:
+            bundle_asin_component_json = json.dumps(bundle_asin_component_list)
+        else:
+            bundle_asin_component_json = None
        # 五点描述
        for i in ASIN_XPATH['five_data']:
            five_text_list = self.response_s.xpath(i)
@@ -2815,7 +2856,7 @@ class ParseAsinUs(object):
                'customer_reviews_json': customer_reviews_json, 'together_asin_json': together_asin_json,
                'min_match_asin_json': min_match_asin_json, 'seller_json': seller_json, 'current_asin': current_asin,
                'div_id_list': div_id_list, 'bundles_this_asins_data_json': bundles_this_asins_data_json,
-                'video_m3u8': video_m3u8, 'result_list_json': result_list_json}
+                'video_m3u8': video_m3u8, 'result_list_json': result_list_json,'bundle_asin_component_json':bundle_asin_component_json}
        if self.site_name == 'us':
            item['three_four_val'] = Join_Prime_int
        elif self.site_name in ['uk', 'fr', 'it', 'es']: