import json
import random
import time

import pandas as pd
import redis
from lxml import html
from playwright.sync_api import sync_playwright
from secure_db_client import get_remote_engine


def mysql_connect():
    engine_us_mysql = get_remote_engine(
        site_name='us',  # -> database "selection"
        db_type='mysql',  # -> 服务端 alias "mysql"
    )
    return engine_us_mysql


def run(asin_list):
    print('asin_list:::',asin_list)
    print('asin_list:::',len(asin_list))
    if asin_list:
        # 初始化
        with sync_playwright() as _playwright:
            # _playwright.chromium.launch_persistent_context
            browser = _playwright.chromium.launch_persistent_context(
                # 指定本机用户缓存地址
                user_data_dir=r"C:\Users\Administrator\AppData\Local\Google\Chrome\User Data",
                # 指定本机google客户端exe的路径
                executable_path=r"C:\Program Files\Google\Chrome\Application\chrome.exe",
                # 要想通过这个下载文件这个必然要开  默认是False
                accept_downloads=True,
                # 设置不是无头模式
                headless=False,  # False 打开。 True 无头浏览器
                bypass_csp=True,
                locale='en-GB',
                ignore_https_errors=True,
                no_viewport=True,
                slow_mo=10,
                # 跳过检测
                args=['--disable-blink-features=AutomationControlled', '--remote-debugging-port=9222']

            )

            page = browser.new_page()
            js = """
                            Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
                            """
            page.add_init_script(js)
            page.evaluate_handle('''() =>{ window.chrome = { runtime: {}, }; }''')
            page.evaluate_handle(
                '''() =>{ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], }); }''')
            # 模拟浏览器参数
            page.locator("body").click()
            js = """
                            Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
                            """
            page.add_init_script(js)

            print('打开浏览器请求asin:')
            page = browser.new_page()
            try:
                page.goto('https://sellercentral.amazon.com')
                time.sleep(random.uniform(2, 5))
            except:
                save_asin_var_data(asin_list[0], json.dumps({"content": "网络有问题 登录账号失败。远程账号电脑检查"}), '失败')
            for asin in asin_list:
                time.sleep(random.uniform(1, 3))

                try:
                    print('请求asin', asin)
                    url = f"https://sellercentral.amazon.com/listing/varwiz/search?searchText={asin}"
                    print('url:', url)
                    page.goto(url)
                    time.sleep(random.uniform(3, 8))
                    print()
                    print(page.content())
                    html_string = page.content()
                    time.sleep(0.5)
                    if 'The ASIN you searched for is not part of any variation' not in html_string:
                        doc = html.fromstring(html_string)
                        # 取第一个 <pre> 的文本内容（会自动去掉标签内 HTML）
                        pre_nodes = doc.xpath('//pre')
                        if not pre_nodes:
                            raise ValueError("找不到 <pre> 节点")
                        pre_text = pre_nodes[0].text_content().strip()
                        # 直接尝试解析（适用于 <pre> 里就是整段 JSON 的情况）
                        data_json = json.loads(pre_text)
                        print(data_json)  # dict / list
                        print('获取完成', asin)
                        save_asin_var_data(asin, data_json, '成功')

                    else:
                        print('没有该asin,', asin)
                        save_asin_var_data(asin, json.dumps(
                            {"content": "The ASIN you searched for is not part of any variation family"}), '成功')
                except Exception as e:
                    print('报错，‘23232323232323232323', e)
                    save_asin_var_data(asin, json.dumps({"content": "下载失败。远程账号电脑检查"}), '失败')
                    continue


def redis_get_asin():
    asin_list = []
    random_key_list = []
    redis_client = redis.Redis(host='113.100.143.162', port=6379, db=10, password='fG7#vT6kQ1pX')
    while True:
        try:
            print('轮询redis 查询，')
            for i in range(10):
                # 随机获取一个key
                random_key = redis_client.randomkey()
                if random_key:
                    random_key_list.append(random_key)
                    # 获取该key对应的value
                    value = redis_client.get(random_key)
                    value = value.decode('utf-8')
                    print('redis取出asin: ', value)
                    if value not in asin_list:
                        asin_list.append(value)
                else:
                    break

            if asin_list:
                _asin_lis = list(set(asin_list))
                print("_asin_lis:::",_asin_lis, )
                print("_asin_lis::: len ", len(_asin_lis))
                run(_asin_lis)  # 传递asin 列表
                asin_list = []
                for _key in random_key_list:
                    print(' 删除redis的asin:', _key)
                    redis_client.delete(_key)  # 删除redis的asin
                random_key_list = []
            else:
                time.sleep(3)
                continue
            # redis_client.close() 关闭redis
        except Exception as e:
            print('查询redis报错', e)
            redis_client.close()
            redis_client = redis.Redis(host='192.168.10.224', port=6379, db=10, password='fG7#vT6kQ1pX')
            time.sleep(5)
            continue


def save_asin_var_data(asin, data_json, spider_value):
    engine_us_mysql = mysql_connect()
    workflow_everyday_list = [[asin, data_json, spider_value]]
    print('存储数据：', len(workflow_everyday_list))
    df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
                                          columns=['asin', 'asin_var_data', 'spider_value'])
    engine_us_mysql.to_sql(df_seller_asin_account, 'us_asin_var_info')


if __name__ == '__main__':
    redis_get_asin()
