import datetime
import json
import os
import random
import re
import sys
import time
import traceback

import pandas as pd
from lxml import etree
from secure_db_client import get_remote_engine
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

syn_state = False


# 类目分析
class dow_category_Product():
    def __init__(self, site):
        self.site_name = site
        self.click_product_name_list = []
        self.update_cagetory_state = False

    def mysql_connect(self, site='us'):
        self.engine_mysql = get_remote_engine(
            site_name=site,  # -> database "selection"
            db_type='mysql',  # -> 服务端 alias "mysql"
        )

        self.engine_us_mysql = get_remote_engine(
            site_name='us',  # -> database "selection"
            db_type='mysql',  # -> 服务端 alias "mysql"
        )

        self.engine_pg = get_remote_engine(
            site_name=site,  # -> database "selection"
            db_type='postgresql_15_outer',  # -> 服务端 alias "mysql"
        )
        self.num = 0
        week = time.strftime("%W")
        yaer = time.strftime('%Y', time.localtime(time.time()))
        self.y_w = f"{yaer}-{week}"

    def download_day_st(self, site='us'):
        print(f'下载 {site} 站点数据')
        for i in range(1):
            try:
                pr_name = "chrome.exe"
                os.system('%s%s' % ("taskkill /F /IM ", pr_name))
            except Exception as e:
                print("强制关闭chrome.exe失败：", e)
            port = 9222
            params_ = ""
            # params_ = "--blink-settings=imagesEnabled=false"
            os.system(f'start Chrome {params_} --remote-debugging-port={port}')
            chrome_options = Options()
            # 禁止加载图片
            chrome_options.add_argument('--blink-settings=imagesEnabled=false')
            chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{port}")
            driver = webdriver.Chrome(r'chromedriver.exe', options=chrome_options)
            # 无界面模式
            # chrome_options.add_argument('--headless')
            # 禁用 GPU，加快在 headless 下的渲染
            chrome_options.add_argument('--disable-gpu')
            # 禁用沙箱，规避权限问题
            chrome_options.add_argument('--no‑sandbox')
            # 改用 /tmp 而不是 /dev/shm（避免共享内存不足）
            chrome_options.add_argument('--disable-dev-shm-usage')
            self.get_category(site, driver)

    def get_category(self, site, driver):
        for i in range(2):
            try:
                driver.get('https://sellercentral.amazon.com/gp/homepage.html/ref=xx_home_logo_xx?')
                time.sleep(random.uniform(8, 20.25))
                driver.get('https://sellercentral.amazon.com/selection/category-insights')
                time.sleep(random.uniform(8, 20.25))
                break
            except:
                time.sleep(5)
        time.sleep(5)
        if site == 'us':
            driver.execute_script(
                'document.querySelector("#ATVPDKIKX0DER > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
        elif site == 'uk':
            driver.execute_script(
                'document.querySelector("#A1F83G8C2ARO7P > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
        elif site == 'de':
            driver.execute_script(
                'document.querySelector("#A1PA6795UKMFR9 > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
        time.sleep(random.uniform(5, 10.25))
        html = etree.HTML(driver.page_source)
        self.save_category(html)
        print(333333333333333333333333)
        global syn_state
        syn_state = True
        Category_list = self.read_category()
        if Category_list:
            self.get_category_data(Category_list, driver, site)

    def cilik_site(self, driver):
        if self.site_name == 'us':
            driver.execute_script(
                'document.querySelector("#ATVPDKIKX0DER > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
        elif self.site_name == 'uk':
            driver.execute_script(
                'document.querySelector("#A1F83G8C2ARO7P > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
        elif site == 'de':
            driver.execute_script(
                'document.querySelector("#A1PA6795UKMFR9 > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')

    def get_category_data(self, Category_list, driver, site):
        print('Category_list:::', Category_list)
        num = 0
        for Category in Category_list:
            self.cilik_site(driver)
            print(Category, '   22222222222222222222222222222222222222')
            if self.update_cagetory_state:
                self.click_product_name_list = []
            try:
                num += 1
                Category_name = Category
                print("Category_name 名称 11111", Category)
                driver.execute_script(f"""document.querySelector("kat-radiobutton[label='{Category}']").click()""")
                time.sleep(1)
                html = etree.HTML(driver.page_source)
                Product_Type_list = html.xpath(
                    '//h2[contains(text(),"Product Type")]/following-sibling::div/div')
                product_nums = 0
                for Product_Type in Product_Type_list:
                    try:
                        save_Category_list = []
                        Product_name = Product_Type.xpath('./@id')
                        print(product_nums, "Product_name3222222222::", Product_name[0].upper())
                        if Product_name[0] in self.click_product_name_list:
                            print(product_nums, "已经抓取::", Product_name[0].upper())
                            continue

                        driver.execute_script("localStorage.clear();")  # 清除本地存储
                        time.sleep(0.5)
                        driver.execute_script(
                            "caches.keys().then(function(names) { for (let name of names) { caches.delete(name); } });")
                        driver.execute_script("window.performance.clearResourceTimings();")
                        time.sleep(0.5)
                        # 假设你已经有了 driver
                        # 先 enable heap profiler
                        driver.execute_cdp_cmd('HeapProfiler.enable', {})
                        # 然后强制 GC
                        driver.execute_cdp_cmd('HeapProfiler.collectGarbage', {})
                        # 最后可选地 disable 掉
                        driver.execute_cdp_cmd('HeapProfiler.disable', {})
                        time.sleep(0.5)
                        self.click_product_name_list.append(Product_name[0])
                        self.update_cagetory_state = False
                        driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()")
                        time.sleep(2)
                        html = etree.HTML(driver.page_source)
                        Item_Type_Keyword_id_list = html.xpath(
                            '//h2[contains(text(),"Item Type Keyword")]/following-sibling::div/div')
                        for Item_Type_Keyword_id in Item_Type_Keyword_id_list:
                            try:
                                most_popular_keyword_list = []
                                reasons_returns_json = None
                                most_popular_json_dict = None
                                Keyword_id = Item_Type_Keyword_id.xpath('./@id')
                                print("Keyword_id:", Keyword_id)
                                Keyword = html.xpath(f"//div[@id='{Keyword_id[0]}']/kat-radiobutton/@label")
                                print('Keyword', Keyword)
                                driver.find_element(By.XPATH, f'//kat-radiobutton[@value="{Keyword_id[0]}"]').click()
                                time.sleep(2)
                                html_1 = etree.HTML(driver.page_source)
                                most_popular_list = html_1.xpath(
                                    "//div[@class='most-popular-keywords-container']/kat-list//li")
                                if most_popular_list:
                                    for most_popular in most_popular_list:
                                        most_keyword_list = most_popular.xpath('.//div[2]/text()')
                                        print(most_keyword_list, 1111)
                                        most_popular_keyword = most_keyword_list[0] if most_keyword_list else None
                                        most_popular_b_nums_list = most_popular.xpath('.//div/b/text()')
                                        print(most_popular_b_nums_list, 2222)
                                        most_popular_b_nums = most_popular_b_nums_list[
                                            0] if most_popular_b_nums_list else None
                                        most_popular_dict = {"most_popular_keywords": most_popular_keyword,
                                                             'most_popular_search_nums': most_popular_b_nums}
                                        most_popular_keyword_list.append(most_popular_dict)
                                    print('most_popular_keyword_list::', most_popular_keyword_list)
                                    most_popular_json_dict = json.dumps(most_popular_keyword_list)

                                div_list = html_1.xpath("//div[@class='percentage-list-item-container']/div")
                                if div_list:
                                    reasons_returns_list = []
                                    for div in div_list:
                                        values = div.xpath("./div[@class='value']/text()")
                                        value = values[0] if values else None
                                        strings = div.xpath("./div[@class='string']/text()")
                                        string = strings[0] if strings else None
                                        value_string_dict = {"value": value, 'string': string}
                                        print(value_string_dict)
                                        reasons_returns_list.append(value_string_dict)
                                    reasons_returns_json = json.dumps(reasons_returns_list)
                                    print('reasons_returns_json:', reasons_returns_json)
                                '1.86‰ product type average'
                                print('测试')
                                ratio_list = html_1.xpath(
                                    '//div[@class="big-text-section-name"][1]/div[@class="big-text"]/text()')
                                if ratio_list:
                                    search_ratio = re.findall(r'(.*?)‰', ratio_list[0])[0]  # 搜索购买比率
                                    return_ratio = re.findall(r'(.*?)%', ratio_list[1])[0]  # 回报率
                                else:
                                    search_ratio = None
                                    return_ratio = None
                                product_ratio_list = html_1.xpath(
                                    '//div[@class="big-text-section-name"][1]/div[@class="sub-text"]/text()')
                                if product_ratio_list:
                                    product_average = re.findall(r'(.*?)‰', product_ratio_list[0])[0]  # 产品类型平均值
                                    return_product_average = re.findall(r'(.*?)%', product_ratio_list[1])[
                                        0]  # 产品类型回报率平均值
                                else:
                                    product_average = None
                                    return_product_average = None
                                print('测试2')
                                big_text_sllers = html_1.xpath(
                                    '//h4[contains(text(),"Number of sellers")]/parent::div/following-sibling::div/text()')  # 卖家数量
                                big_text_sller = big_text_sllers[0] if big_text_sllers else None
                                big_text_brands = html_1.xpath(
                                    '//h4[contains(text(),"Number of new brands")]/parent::div/following-sibling::div/text()')  # 品牌数量
                                big_text_brand = big_text_brands[0] if big_text_brands else None
                                big_text_asins = html_1.xpath(
                                    '//h4[contains(text(),"Number of ASINs")]/parent::div/following-sibling::div/text()')  # asin 数量
                                big_text_asin = big_text_asins[0] if big_text_asins else None
                                big_text_new_asins = html_1.xpath(
                                    '//h4[contains(text(),"Number of new ASINs")]/parent::div/following-sibling::div/text()')  # 新asin 数量
                                big_text_new_asin = big_text_new_asins[0] if big_text_new_asins else None
                                big_text_per_asins = html_1.xpath(
                                    '//h4[contains(text(),"Offers per ASIN")]/parent::div/following-sibling::div/text()')  # 每个asin平均报价数量
                                big_text_per_asin = big_text_per_asins[0] if big_text_per_asins else None
                                big_text_Advertisement_list = html_1.xpath(
                                    '//h4[contains(text(),"Advertisement Spend")]/parent::div/following-sibling::div//text()')  # 广告支出
                                if big_text_Advertisement_list:
                                    big_text_Advertisement = '|-|'.join(big_text_Advertisement_list)
                                else:
                                    big_text_Advertisement = None
                                big_text_star_list = html_1.xpath(
                                    '//h4[contains(text(),"Star Ratings")]/parent::div/parent::div/parent::div/div//@width')  # 星级的宽度
                                if big_text_star_list:
                                    del big_text_star_list[0]
                                    big_text_star = '|-|'.join(big_text_star_list)
                                else:
                                    big_text_star = None
                                print('测试3')
                                if big_text_star:
                                    sta_list = big_text_star.split('|-|')
                                    # 初始化总和为0
                                    total = 0
                                    for num_str in sta_list:
                                        # 将数字字符串转换为浮点数
                                        num = float(num_str)
                                        # 将数字相加
                                        total += num
                                    print('总数：', total)
                                    # 计算每个数字的结果，跳过0
                                    results = [float(num) / total if float(num) != 0 else 0 for num in sta_list]
                                    five_star = round(results[0], 2)
                                    three_star = round(results[1], 2)
                                    two_star = round(results[2], 2)
                                    one_star = round(results[3], 2)
                                else:
                                    five_star = 0
                                    three_star = 0
                                    two_star = 0
                                    one_star = 0
                                if big_text_Advertisement:
                                    # 定义正则表达式模式，匹配$后面的金额
                                    if self.site_name == 'us':
                                        pattern = r'\$([\d.]+)'
                                    elif self.site_name == 'uk':
                                        pattern = r'\£([\d.]+)'
                                    elif self.site_name == 'de':
                                        pattern = r'\€([\d.]+)'
                                    else:
                                        pattern = ''
                                    # 使用findall函数找到所有匹配的金额
                                    matches_list = re.findall(pattern, big_text_Advertisement)
                                    ad_spend = matches_list[0] if matches_list else None
                                    majority_spend = matches_list[1] if matches_list else None
                                else:
                                    ad_spend = 0
                                    majority_spend = 0
                                print("week 周：", self.y_w)
                                print(five_star, three_star, two_star, one_star)
                                print(ad_spend, majority_spend)
                                print('原始数据')
                                print([big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
                                       big_text_per_asin, big_text_Advertisement, big_text_star])
                                print(Category, '   22222222222222222222222222222222222222')
                                # 品牌
                                if big_text_brand:
                                    if 'K' in big_text_brand:
                                        big_brand = big_text_brand.replace('K', '')
                                        big_brand_int = int(float(big_brand) * 1000)
                                    elif 'M' in big_text_brand:
                                        big_brand = big_text_brand.replace('M', '')
                                        big_brand_int = int(float(big_brand) * 10000)
                                    else:
                                        big_brand_int = int(big_text_brand)
                                else:
                                    big_brand_int = None
                                    # asin
                                if big_text_asin:
                                    if 'K' in big_text_asin:
                                        big_asin = big_text_asin.replace('K', '')
                                        big_asin_int = int(float(big_asin) * 1000)
                                    elif 'M' in big_text_asin:
                                        big_asin = big_text_asin.replace('M', '')
                                        big_asin_int = int(float(big_asin) * 10000)
                                    else:
                                        big_asin_int = int(big_text_asin)
                                else:
                                    big_asin_int = None
                                # 新出现asin
                                if big_text_new_asin:
                                    if 'K' in big_text_new_asin:
                                        big_new_asin = big_text_new_asin.replace('K', '')
                                        big_new_asin_int = int(float(big_new_asin) * 1000)
                                    elif 'M' in big_text_new_asin:
                                        big_new_asin = big_text_new_asin.replace('M', '')
                                        big_new_asin_int = int(float(big_new_asin) * 10000)
                                    else:
                                        big_new_asin_int = int(big_text_new_asin)
                                else:
                                    big_new_asin_int = None
                                # 报价 asin
                                if big_text_per_asin:
                                    if 'K' in big_text_per_asin:
                                        big_per_asin = big_text_per_asin.replace('K', '')
                                        big_per_asin_int = int(float(big_per_asin) * 1000)
                                    elif 'M' in big_text_per_asin:
                                        big_per_asin = big_text_per_asin.replace('M', '')
                                        big_per_asin_int = int(float(big_per_asin) * 10000)
                                    else:
                                        big_per_asin_int = int(big_text_per_asin)
                                else:
                                    big_per_asin_int = None
                                print('转成int')
                                print([big_brand_int, big_asin_int,
                                       big_new_asin_int, big_per_asin_int])

                                top_data_json = self.new_top_grossing(driver, 'Top')

                                top_data_dict = json.loads(top_data_json)
                                if top_data_dict.get('products_aggregate_sales'):
                                    _top_data_dict = self.parse_input('top', top_data_dict)
                                else:
                                    _top_data_dict = self.parse_input('top', None)
                                top_sales_amount = _top_data_dict['top_sales_amount']
                                top_sales_volume = _top_data_dict['top_sales_volume']
                                top_search_ratio = _top_data_dict['top_search_ratio']
                                top_return_ratio = _top_data_dict['top_return_ratio']
                                top_adv_spend = _top_data_dict['top_adv_spend']
                                top_majority_spend = _top_data_dict['top_majority_spend']
                                print('top_majority_spend', top_majority_spend)
                                news_data_json = self.new_top_grossing(driver, 'News')

                                news_data_dict = json.loads(news_data_json)
                                if news_data_dict.get('products_aggregate_sales'):
                                    _news_data_dict = self.parse_input('news', news_data_dict)
                                else:
                                    _news_data_dict = self.parse_input('news', None)
                                news_sales_amount = _news_data_dict['news_sales_amount']
                                news_sales_volume = _news_data_dict['news_sales_volume']
                                news_search_ratio = _news_data_dict['news_search_ratio']
                                news_return_ratio = _news_data_dict['news_return_ratio']
                                news_adv_spend = _news_data_dict['news_adv_spend']
                                news_majority_spend = _news_data_dict['news_majority_spend']
                                print('news_majority_spend', news_majority_spend)
                                save_Category_list.append(
                                    [Category_name, Product_name[0], Keyword[0], float(search_ratio),
                                     float(product_average), float(return_ratio), float(return_product_average),
                                     self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
                                     big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int,
                                     big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star,
                                     one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json,
                                     top_data_json, news_data_json, top_sales_amount, top_sales_volume,
                                     top_search_ratio,
                                     top_return_ratio, top_adv_spend, top_majority_spend, news_sales_amount,
                                     news_sales_volume,
                                     news_search_ratio, news_return_ratio, news_adv_spend, news_majority_spend])
                                print('数据：', save_Category_list)
                            except Exception as e:
                                print('============  下标。超出 。 ==========', e)
                        print('存储数据长度：', len(save_Category_list))
                        while True:
                            try:
                                if save_Category_list:
                                    # with self.engine_mysql.begin() as conn_mysql:
                                    #     for i in save_Category_list:
                                    #         dele_sql = f"DELETE from {site}_aba_profit_category_insights where category='{i[0]}' and product_type='{i[1]}' and item_type_keyword='{i[2]}' and year_week='{self.y_w}'"
                                    #         print('删除删除mysql：', dele_sql)
                                    #         conn_mysql.execute(dele_sql)
                                    # df = pd.DataFrame(data=save_Category_list,
                                    #                   columns=['category', "product_type", "item_type_keyword",
                                    #                            "search_ratio", "product_average", "return_ratio",
                                    #                            "return_product_average", "year_week", 'sellers',
                                    #                            'new_brands',
                                    #                            'asin', 'new_asin', 'per_asin', 'advertisement_spend',
                                    #                            'star_ratings', 'new_brands_int', 'asin_int',
                                    #                            'new_asin_int', 'per_asin_int', 'five_star',
                                    #                            'three_star', 'two_star', 'one_star', 'ad_spend',
                                    #                            'majority_spend', 'most_popular_keywords_item',
                                    #                            'reasons_returns_json', 'top_data_json',
                                    #                            'news_data_json',
                                    #                            'top_sales_amount', 'top_sales_volume',
                                    #                            'top_search_ratio',
                                    #                            'top_return_ratio', 'top_adv_spend',
                                    #                            'top_majority_spend',
                                    #                            'news_sales_amount',
                                    #                            'news_sales_volume',
                                    #                            'news_search_ratio', 'news_return_ratio',
                                    #                            'news_adv_spend',
                                    #                            'news_majority_spend'
                                    #                            ])
                                    # self.engine_mysql.to_sql(df, f'{site}_aba_profit_category_insights',
                                    #                          if_exists="append")
                                    # print('存储成功 mysql')
                                    with self.engine_pg.begin() as conn_pg:
                                        for i in save_Category_list:
                                            dele_sql = f"DELETE from {site}_aba_profit_category_insights where category='{i[0]}' and product_type='{i[1]}' and item_type_keyword='{i[2]}' and year_week='{self.y_w}'"
                                            print('删除删除删除pg：', dele_sql)
                                            conn_pg.execute(dele_sql)
                                    df = pd.DataFrame(data=save_Category_list,
                                                      columns=['category', "product_type", "item_type_keyword",
                                                               "search_ratio", "product_average", "return_ratio",
                                                               "return_product_average", "year_week", 'sellers',
                                                               'new_brands',
                                                               'asin', 'new_asin', 'per_asin', 'advertisement_spend',
                                                               'star_ratings', 'new_brands_int', 'asin_int',
                                                               'new_asin_int', 'per_asin_int', 'five_star',
                                                               'three_star', 'two_star', 'one_star', 'ad_spend',
                                                               'majority_spend', 'most_popular_keywords_item',
                                                               'reasons_returns_json', 'top_data_json',
                                                               'news_data_json', 'top_sales_amount', 'top_sales_volume',
                                                               'top_search_ratio',
                                                               'top_return_ratio', 'top_adv_spend',
                                                               'top_majority_spend',
                                                               'news_sales_amount',
                                                               'news_sales_volume',
                                                               'news_search_ratio', 'news_return_ratio',
                                                               'news_adv_spend',
                                                               'news_majority_spend'])
                                    self.engine_pg.to_sql(df, f'{site}_aba_profit_category_insights',
                                                          if_exists="append")
                                    print(save_Category_list)
                                    print('存储成功 pg')
                                break
                            except Exception as e:
                                print('存储报错。。。。：', e, '33333333333333333333333333232323')
                                time.sleep(30)
                                continue
                    except Exception as e:
                        print('============  产品分类 下标。超出 。无数据 ==========', f"\n{traceback.format_exc()}")
                        time.sleep(2)
                        continue
                    product_nums += 1
                    if product_nums > 12:
                        product_nums = 0
                        print(product_nums, 'product_nums 重新启动 浏览器，')
                        product_nums = 0
                        driver.close()
                        driver.quit()
                        time.sleep(1)
                        self.run()
            except Exception as e:
                print(e, '执行错误')
                time.sleep(random.uniform(10, 20))
                self.reboot_driver(driver, site)  # 重启刷新
            while True:
                try:
                    updated_at = datetime.datetime.now().strftime("%m-%d %H:%M:%S")
                    with self.engine_pg.begin() as conn:
                        update_sql = f"update seller_category_insights_syn set state =3 where category='{Category}'"
                        print('更新update_sql：', update_sql)
                        conn.execute(update_sql)
                        self.update_cagetory_state = True
                    break
                except Exception as e:
                    print(e,'修改状态3报错')
                    time.sleep(20)
            if num > 1:
                driver.close()
                driver.quit()
                print('重新启动 浏览器，')
                self.run()
        workflow_everyday_list = [
            [self.site_name, self.y_w, '类目分析抓取完成', 3, f'{self.site_name}_aba_profit_category_insights', 'week', '类目分析',
             '是']]
        df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
                                              columns=['site_name', 'date_info', 'status', 'status_val',
                                                       'table_name', 'date_type', 'page', 'is_end'])
        self.engine_us_mysql.to_sql(df_seller_asin_account, 'workflow_progress', if_exists='append')

    def safe_get(self, lst, idx, default=None):
        return lst[idx] if 0 <= idx < len(lst) else default

    def parse_input(self, type, input):
        sales_amount = 0
        sales_volume = 0
        search_ratio = 0
        return_ratio = 0
        adv_spend = 0
        majority_spend = 0

        if input:
            products_aggregate_sales = input.get('products_aggregate_sales', [])[0]
            if products_aggregate_sales:
                split = products_aggregate_sales.split("|")
                sales_amount_str = self.safe_get(split, 1, '').partition("$")[-1]
                sales_volume_str = self.safe_get(re.findall(r'\d+', self.safe_get(split, 2, '')), 0, "0")
                if len(sales_amount_str) > 0:
                    sales_amount = float(sales_amount_str.strip().replace(",", ""))
                sales_volume = float(sales_volume_str)
                pass

            search_ratio = float(input['search_ratio'] or -1)
            return_ratio = float(input['return_ratio'] or -1)
            big_text_Advertisement = input['big_text_Advertisement']
            if big_text_Advertisement:
                split = big_text_Advertisement.split("|-|")
                adv_spend_str = self.safe_get(split, 0, '').partition("$")[-1]
                majority_spend_str = self.safe_get(split, 1, '').partition("$")[-1]
                adv_spend = (float(adv_spend_str.strip()) if adv_spend_str != '' else 0)
                majority_spend = (float(majority_spend_str.strip()) if majority_spend_str != '' else 0)

        return {
            f"{type}_sales_amount": sales_amount,
            f"{type}_sales_volume": sales_volume,
            f"{type}_search_ratio": search_ratio,
            f"{type}_return_ratio": return_ratio,
            f"{type}_adv_spend": adv_spend,
            f"{type}_majority_spend": majority_spend
        }

    def analysis_top_Newly_html(self, driver):
        html_top = etree.HTML(driver.page_source)
        # 10 Products | Aggregate sales (past 12 months): $103,698,296.00 | 1,222,405 units
        products_aggregate_sales = html_top.xpath("//div[@class='sa-aggregation-label']/text()")
        # 获取10个产品数据
        div_list = html_top.xpath("//div[@aria-label='Press SPACE to select this row.']")
        items_list = []
        seen = set()
        for div in div_list:
            items = {}
            img_src_list = div.xpath('./div//img/@src')
            img_src = img_src_list[0] if img_src_list else None
            a_href_list = div.xpath(".//div//a/@href")
            a_href = a_href_list[0] if a_href_list else None
            title_list = div.xpath("./div//span[@class='popover-content']/text()")
            title = title_list[0] if title_list else None
            brand_list = div.xpath(".//span[contains(@id,'cell-brandName')]//div/text()")  # cell-renderer-popover
            brand = brand_list[0] if brand_list else None
            bsr_rank_list = div.xpath(".//span[contains(@id,'cell-bsrBnRank')]//div/text()")  # cell-renderer-popover
            bsr_rank = bsr_rank_list[0] if bsr_rank_list else None
            buy_price_list = div.xpath(".//span[contains(@id,'cell-buyBoxPrice')]//div/text()")  # cell-renderer-popover
            buy_price = buy_price_list[0] if buy_price_list else None
            # 'cell-avgRating-184 //span[contains(@id,"avgRating")]//kat-star-rating/value/review'
            rating_list = div.xpath(
                ".//span[contains(@id,'avgRating')]//kat-star-rating/@value")  # cell-renderer-popover
            rating = rating_list[0] if rating_list else None
            review_list = div.xpath(
                ".//span[contains(@id,'avgRating')]//kat-star-rating/@review")  # cell-renderer-popover
            review = review_list[0] if review_list else None
            offers_list = div.xpath(".//span[contains(@id,'cell-offerCount')]/text()")  # cell-renderer-popover
            offers = offers_list[0] if offers_list else None
            key = (img_src, a_href, title, brand, bsr_rank, buy_price, rating, review, offers)

            if key in seen:
                continue
            seen.add(key)
            if img_src and a_href and title and brand:
                items['img_src'] = img_src
                items['a_href'] = a_href
                items['title'] = title
                items['brand'] = brand
                items['bsr_rank'] = bsr_rank
                items['buy_price'] = buy_price
                items['rating'] = rating
                items['review'] = review
                items['offers'] = offers
                items_list.append(items)
        print(items_list)

        print(products_aggregate_sales)
        # Most Popular Keywords
        most_popular_list = html_top.xpath(
            "//div[@class='most-popular-keywords-container']/kat-list//li")
        most_popular_keyword_list = []
        if most_popular_list:
            for most_popular in most_popular_list:
                most_popular_keyword = most_popular.xpath('.//div[2]/text()')[0]
                most_popular_b_nums = most_popular.xpath('.//div/b/text()')[0]
                most_popular_dict = {"most_popular_keywords": most_popular_keyword,
                                     'most_popular_search_nums': most_popular_b_nums}
                most_popular_keyword_list.append(most_popular_dict)
            print('most_popular_keyword_list::', most_popular_keyword_list)

        # Search to purchase ratio |  Return Ratio
        top_ratio_list = html_top.xpath('//div[@class="big-text-section-name"][1]/div[@class="big-text"]/text()')
        if top_ratio_list:
            search_ratio = re.findall(r'(.*?)‰', top_ratio_list[0])[0]  # 搜索购买比率
            return_ratio = re.findall(r'(.*?)%', top_ratio_list[1])[0]  # 回报率
        else:
            search_ratio = None
            return_ratio = None

        big_text_Advertisement_list = html_top.xpath(
            '//h4[contains(text(),"Advertisement Spend")]/parent::div/following-sibling::div//text()')  # 广告支出
        if big_text_Advertisement_list:
            big_text_Advertisement = '|-|'.join(big_text_Advertisement_list)
        else:
            big_text_Advertisement = None

        big_text_star_list = html_top.xpath(
            '//h4[contains(text(),"Star Ratings")]/parent::div/parent::div/parent::div/div//@width')  # 星级的宽度
        if big_text_star_list:
            del big_text_star_list[0]
            big_text_star = '|-|'.join(big_text_star_list)
        else:
            big_text_star = None

        data_dict = {'products_aggregate_sales': products_aggregate_sales, 'asin_json': items_list,
                     'most_popular_json_dict': most_popular_keyword_list, 'search_ratio': search_ratio,
                     'return_ratio': return_ratio,
                     'big_text_Advertisement': big_text_Advertisement, 'big_text_star': big_text_star}
        print('data_dict',data_dict)
        return json.dumps(data_dict)

    def new_top_grossing(self, driver, click_type):
        # Top Grossing
        if click_type == 'Top':
            print('点击Top Grossing')
            time.sleep(1)
            driver.execute_script(
                f'document.querySelector("#section_id > kat-radiobutton:nth-child(2) > kat-label").click()')
            time.sleep(1)
            top_data_dict = self.analysis_top_Newly_html(driver)
            return top_data_dict
        else:
            # Newly launched
            print('点击Newly launched')
            time.sleep(1)
            driver.execute_script(
                f'document.querySelector("#section_id > kat-radiobutton:nth-child(3) > kat-label").click()')
            time.sleep(1)
            newly_data_dict = self.analysis_top_Newly_html(driver)
            return newly_data_dict

    def reboot_driver(self, driver, site):
        # 重启刷新
        while True:
            try:
                driver.get('https://sellercentral.amazon.com/selection/category-insights')
                time.sleep(random.uniform(10, 20))
                driver.execute_script(
                    'document.querySelector("#ATVPDKIKX0DER > kat-radiobutton").shadowRoot.querySelector("div > div.text > slot > kat-label:nth-child(1)").click()')
                time.sleep(2)
                break
            except Exception as e:
                print('reboot_driver详细报错')
                print(traceback.format_exc())
                driver.close()
                driver.quit()
                time.sleep(5)
                self.run()
                time.sleep(5)
        Category_list = self.read_category()
        if Category_list:
            time.sleep(5)
            self.get_category_data(Category_list, driver, site)

    def read_category(self):
        # 接着上次中断的继续
        print('接着上次中断的继续')
        self.mysql_connect(site=self.site_name)
        select_sql = 'select category  from seller_category_insights_syn where state =1'
        df = self.engine_pg.read_sql(select_sql)
        category_list = list(df.category)
        print(category_list)
        if category_list:
            return category_list
        else:
            self.mysql_connect()
            workflow_everyday_list = [
                [self.site_name, self.y_w, '类目分析抓取完成', 3, f'{self.site_name}_aba_profit_category_insights', 'week',
                 '类目分析', '是']]
            df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
                                                  columns=['site_name', 'date_info', 'status', 'status_val',
                                                           'table_name', 'date_type', 'page', 'is_end'])
            self.engine_us_mysql.to_sql(df_seller_asin_account, 'workflow_progress', if_exists='append'
                                        )

    def save_category(self, html):
        Category_list = html.xpath('//h2[contains(text(),"Category")]/following-sibling::div/div')
        Categorys_list = []
        self.category_item = {}
        Categorys_list_syn = []
        for Category in Category_list:
            Category_name = Category.xpath('./@id')
            Category_label = Category.xpath('.//@label')
            self.category_item[Category_label[0]] = Category_name[0]
            Categorys_list.append(Category_name[0])
            Categorys_list_syn.append([Category_label[0]])
        print(Categorys_list, '11111111111111111')
        if Categorys_list:
            print('插入数据')
            with self.engine_pg.begin() as conn:
                if syn_state == False:
                    # pass
                    now = datetime.datetime.now()
                    is_monday = (now.weekday() == 0)  # 0表示星期一
                    # is_920_am = (now.hour == 9 and now.minute == 20)
                    is_9_am = (now.hour == 1)
                    if is_monday and is_9_am:
                        print("当前时间是每周一早上9点20分")
                        TRUNCATE_SQL = 'TRUNCATE seller_category_insights_syn'
                        conn.execute(TRUNCATE_SQL)
                    conn.execute(
                        f'INSERT INTO seller_category_insights_syn (category) VALUES (%s) ON CONFLICT (category) DO UPDATE SET category = EXCLUDED.category',
                        Categorys_list_syn)
                print('存储初始数据成功')
        return Categorys_list

    def run(self):
        self.mysql_connect(site=self.site_name)
        self.download_day_st(site=self.site_name)


if __name__ == '__main__':
    site = sys.argv[1]
    dow_category_Product(site).run()
