import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import traceback
from sqlalchemy import create_engine
import requests
from sqlalchemy import text
from lxml import etree
import os
import socket
from selenium.webdriver.common.keys import Keys
import re
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.common.exceptions import WebDriverException, TimeoutException
import random
import pydub
from speech_recognition import Recognizer, AudioFile
from time import sleep
from random import randint
from selenium.common.exceptions import NoSuchElementException
import numpy as np
import datetime
import redis


class H10():
    def __init__(self):
        self.db_syn = 'all_h10_syn'
        self.site_name = 'us'
        self.site_name_csv = 'us'
        self.sku_list = []
        self.err_asin_list = []
        self.err_asins_adv_list = []
        self.asin_state_5_list = []
        self.sku_state = False
        self.ip = self.get_ip_address()
        self.useremail_state = True
        self.redis_db()
        # self.mysql_inv()

    def redis_db(self):
        self.redis_db1 = redis.Redis(host='120.79.147.190', port=6379, password='Vm5vQH4ydFXh', db=0)

    def get_token(self):
        while True:
            try:
                val = self.redis_db1.hget('thirdParty:token:inventory', 'disanfang')
                self.val_str = val.decode('utf-8')
                print(self.val_str)
                break
            except:
                self.redis_db()
                print('redis 获取token 报错')
                time.sleep(20)

    def get_ip_address(self):
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('baidu.com', 0))
        ip = s.getsockname()[0]
        # You are viewing a demo of Cerebro
        user_pw_dict = {'192.168.10.244': [r'C:\Users\win10-244\Downloads', 'YSWGHF422023@outlook.com', 'soundasia422023@'],
                        '192.168.10.245': [r'C:\Users\win10-245\Downloads', 'CherryY2023@outlook.com', '20230322Yy@'],
                        '192.168.10.246': [r'C:\Users\win10-246\Downloads', 'H10961961@outlook.com', 'soundasia961961@'],
                        '192.168.10.247': [r'C:\Users\win10-247\Downloads', 'X18756082657@outlook.com', 'Zyx13075039897@'],
                        '192.168.0.120': [r'C:\Users\1\Downloads', 'yashengweige678@outlook.com', '987654321yswg@'],
                        '192.168.0.121': [r'C:\Users\1\Downloads', 'a18756082657@outlook.com', '12345678Ll@'],
                        '192.168.0.126': [r'C:\Users\Administrator\Downloads', 'yswg12345678@outlook.com', 'yswg654321@'],
                        '192.168.0.127': [r'C:\Users\1\Downloads', 'wretyu2023@outlook.com', 'Sffgserter@1'],
                        '192.168.0.122': [r'C:\Users\1\Downloads', 'yashengweige678@outlook.com', '987654321yswg@'],
                        '192.168.0.124': [r'C:\Users\1\Downloads', 'yswg006@hotmail.com', 'Chianbugye@8346148'],
                        }
        user_pw_list = user_pw_dict.get(ip)
        if user_pw_list:
            return user_pw_list
        else:
            return []

    def mysql_inv(self):
        nums = 0
        while True:
            nums += 1
            try:
                self.engine_adv = create_engine(
                    'mysql+pymysql://chenjianyun:Cjy8751_07@rm-wz956fk600d89g2g7uo.mysql.rds.aliyuncs.com:3306/inventory?charset=utf8mb4')  # , pool_recycle=3600
                break
            except Exception as e:
                print("error_mysql_connect:", e, f"\n{traceback.format_exc()}")
                time.sleep(nums * 20)
                continue

    def mysql_connect(self, site='us'):
        DB_CONN_DICT = {
            "mysql_port": 3306,
            "mysql_db": "selection",
            "mysql_user": "XP_Yswg2025_PY",
            "mysql_pwd": "Gd1pGJog1ysLMLBdML8w81",
            "mysql_host": "rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com",
        }
        nums = 0
        while True:
            nums += 1
            try:
                db = f'selection'
                self.engine_us = create_engine(
                    f'mysql+pymysql://{DB_CONN_DICT["mysql_user"]}:' + f'{DB_CONN_DICT["mysql_pwd"]}@{DB_CONN_DICT["mysql_host"]}:{DB_CONN_DICT["mysql_port"]}/{db}?charset=utf8mb4')  # , pool_recycle=3600
                break
            except Exception as e:
                print("error_mysql_connect:", e, f"\n{traceback.format_exc()}")
                time.sleep(nums * 20)
                continue
        nums = 0
        while True:
            nums += 1
            try:
                if self.site_name == 'us' or self.site_name == 'mx':
                    db = 'selection'
                else:
                    db = f'selection_{site}'
                self.engine = create_engine(
                    f'mysql+pymysql://{DB_CONN_DICT["mysql_user"]}:' + f'{DB_CONN_DICT["mysql_pwd"]}@{DB_CONN_DICT["mysql_host"]}:{DB_CONN_DICT["mysql_port"]}/{db}?charset=utf8mb4')  # , pool_recycle=3600
                break
            except Exception as e:
                print("error_mysql_connect:", e, f"\n{traceback.format_exc()}")
                time.sleep(nums * 20)
                continue

    def web_drver(self):
        # port = 9222
        # params_ = ""
        # params_ = "--blink-settings=imagesEnabled=false"
        # os.system(f'start Chrome {params_} --remote-debugging-port={port}')
        chrome_options = Options()
        # 禁止加载图片
        # chrome_options.add_argument('--blink-settings=imagesEnabled=false')
        # chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{port}") # 打开调用本地浏览器
        # 设置driver以无头浏览的模式运行
        # chrome_options.add_argument('-headless')
        # 禁用GPU（可选）
        chrome_options.add_argument('-disable-gpu')
        chrome_options.add_argument("--disable-notifications")
        chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_argument('--ignore-ssl-errors')  # 忽略ssl错误
        chrome_options.add_argument("disable-blink-features=AutomationControlled")
        chrome_options.add_argument('–no-sandbox')  # 沙盒模式运行
        # 忽略无关的日志
        chrome_options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
        # 禁止硬件加速，避免严重占用cpu
        chrome_options.add_argument('--disable-gpu')
        # 隐身模式（无痕模式）
        # chrome_options.add_argument('--incognito')
        chrome_options.add_argument("--start-maximized")
        chrome_options.add_argument('--disable-gpu')
        # 以最高权限运行
        chrome_options.add_argument('--no-sandbox')
        # 启用打印预览。
        chrome_options.add_argument("--enable-print-preview")
        # 在工具 栏增加一个书签按钮
        chrome_options.add_argument("--bookmark-menu")
        # 启用书签同步
        chrome_options.add_argument("--enable-sync")
        chrome_options.add_argument('–allow-running-insecure-content')  # 允许运行不安全的内容
        chrome_options.add_argument('–disable-web-security')  # 关闭安全策略
        chrome_options.add_argument('–disable-xss-auditor')  # 禁止xss防护
        # 解决浏览器弹出下载多个文件 允许
        chrome_options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 1})
        chrome_options.add_argument(' window-size=1920,1080')
        chrome_options.add_experimental_option("prefs",
                                               {"profile.default_content_setting_values.automatic_downloads": 1})
        # 创建一个带有配置文件的 Chrome 浏览器实例
        self.driver = webdriver.Chrome(options=chrome_options)

        # 设置headers
        self.driver.execute_cdp_cmd("Network.setExtraHTTPHeaders",
                                    {"headers":
                                        {
                                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
                                        }
                                    })

        # 防止网站检测selenium的webdriver
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                    Object.defineProperty(navigator, 'webdriver', {
                        get: () => False
                    })
                """})

        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                                                                    Object.defineProperty(navigator, 'webdriver', {
                                                                      get: () => undefined
                                                                    })
                                                                  """
        })
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """const toBlob=HTMLCanvasElement.prototype.toBlob;const toDataURL=HTMLCanvasElement.prototype.toDataURL;const getImageData=CanvasRenderingContext2D.prototype.getImageData;function noisify(canvas,context){if(context){const shift={'r':Math.floor(Math.random()*10)-5,'g':Math.floor(Math.random()*10)-5,'b':Math.floor(Math.random()*10)-5,'a':Math.floor(Math.random()*10)-5};const width=canvas.width;const height=canvas.height;if(width&&height){const imageData=getImageData.apply(context,[0,0,width,height]);for(let i=0;i<height;i++){for(let j=0;j<width;j++){const n=((i*(width*4))+(j*4));imageData.data[n+0]=imageData.data[n+0]+shift.r;imageData.data[n+1]=imageData.data[n+1]+shift.g;imageData.data[n+2]=imageData.data[n+2]+shift.b;imageData.data[n+3]=imageData.data[n+3]+shift.a}}window.top.postMessage("canvas-fingerprint-defender-alert",'*');context.putImageData(imageData,0,0)}}}Object.defineProperty(HTMLCanvasElement.prototype,"toBlob",{"value":function(){noisify(this,this.getContext("2d"));return toBlob.apply(this,arguments)}});Object.defineProperty(HTMLCanvasElement.prototype,"toDataURL",{"value":function(){noisify(this,this.getContext("2d"));return toDataURL.apply(this,arguments)}});Object.defineProperty(CanvasRenderingContext2D.prototype,"getImageData",{"value":function(){noisify(this.canvas,this);return getImageData.apply(this,arguments)}});document.documentElement.dataset.cbscriptallow=true;if(document.documentElement.dataset.cbscriptallow!=="true"){const iframes=[...window.top.document.querySelectorAll("iframe[sandbox]")];for(var i=0;i<iframes.length;i++){if(iframes[i].contentWindow){if(iframes[i].contentWindow.CanvasRenderingContext2D){iframes[i].contentWindow.CanvasRenderingContext2D.prototype.getImageData=CanvasRenderingContext2D.prototype.getImageData}if(iframes[i].contentWindow.HTMLCanvasElement){iframes[i].contentWindow.HTMLCanvasElement.prototype.toBlob=HTMLCanvasElement.prototype.toBlob;iframes[i].contentWindow.HTMLCanvasElement.prototype.toDataURL=HTMLCanvasElement.prototype.toDataURL}}}}""", })
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """var config={"random":{"value":function(){return Math.random()},"item":function(e){var rand=e.length*config.random.value();return e[Math.floor(rand)]},"number":function(power){var tmp=[];for(var i=0;i<power.length;i++){tmp.push(Math.pow(2,power[i]))}return config.random.item(tmp)},"int":function(power){var tmp=[];for(var i=0;i<power.length;i++){var n=Math.pow(2,power[i]);tmp.push(new Int32Array([n,n]))}return config.random.item(tmp)},"float":function(power){var tmp=[];for(var i=0;i<power.length;i++){var n=Math.pow(2,power[i]);tmp.push(new Float32Array([1,n]))}return config.random.item(tmp)}},"spoof":{"webgl":{"buffer":function(target){var proto=target.prototype?target.prototype:target.__proto__;const bufferData=proto.bufferData;Object.defineProperty(proto,"bufferData",{"value":function(){var index=Math.floor(config.random.value()*arguments[1].length);var noise=arguments[1][index]!==undefined?0.1*config.random.value()*arguments[1][index]:0;arguments[1][index]=arguments[1][index]+noise;window.top.postMessage("webgl-fingerprint-defender-alert",'*');return bufferData.apply(this,arguments)}})},"parameter":function(target){var proto=target.prototype?target.prototype:target.__proto__;const getParameter=proto.getParameter;Object.defineProperty(proto,"getParameter",{"value":function(){window.top.postMessage("webgl-fingerprint-defender-alert",'*');if(arguments[0]===3415)return 0;else if(arguments[0]===3414)return 24;else if(arguments[0]===36348)return 30;else if(arguments[0]===7936)return"WebKit";else if(arguments[0]===37445)return"Google Inc.";else if(arguments[0]===7937)return"WebKit WebGL";else if(arguments[0]===3379)return config.random.number([14,15]);else if(arguments[0]===36347)return config.random.number([12,13]);else if(arguments[0]===34076)return config.random.number([14,15]);else if(arguments[0]===34024)return config.random.number([14,15]);else if(arguments[0]===3386)return config.random.int([13,14,15]);else if(arguments[0]===3413)return config.random.number([1,2,3,4]);else if(arguments[0]===3412)return config.random.number([1,2,3,4]);else if(arguments[0]===3411)return config.random.number([1,2,3,4]);else if(arguments[0]===3410)return config.random.number([1,2,3,4]);else if(arguments[0]===34047)return config.random.number([1,2,3,4]);else if(arguments[0]===34930)return config.random.number([1,2,3,4]);else if(arguments[0]===34921)return config.random.number([1,2,3,4]);else if(arguments[0]===35660)return config.random.number([1,2,3,4]);else if(arguments[0]===35661)return config.random.number([4,5,6,7,8]);else if(arguments[0]===36349)return config.random.number([10,11,12,13]);else if(arguments[0]===33902)return config.random.float([0,10,11,12,13]);else if(arguments[0]===33901)return config.random.float([0,10,11,12,13]);else if(arguments[0]===37446)return config.random.item(["Graphics","HD Graphics","Intel(R) HD Graphics"]);else if(arguments[0]===7938)return config.random.item(["WebGL 1.0","WebGL 1.0 (OpenGL)","WebGL 1.0 (OpenGL Chromium)"]);else if(arguments[0]===35724)return config.random.item(["WebGL","WebGL GLSL","WebGL GLSL ES","WebGL GLSL ES (OpenGL Chromium"]);return getParameter.apply(this,arguments)}})}}}};config.spoof.webgl.buffer(WebGLRenderingContext);config.spoof.webgl.buffer(WebGL2RenderingContext);config.spoof.webgl.parameter(WebGLRenderingContext);config.spoof.webgl.parameter(WebGL2RenderingContext);document.documentElement.dataset.wgscriptallow=true;if(document.documentElement.dataset.wgscriptallow!=="true"){const iframes=[...window.top.document.querySelectorAll("iframe[sandbox]")];for(var i=0;i<iframes.length;i++){if(iframes[i].contentWindow){if(iframes[i].contentWindow.WebGLRenderingContext){iframes[i].contentWindow.WebGLRenderingContext.prototype.bufferData=WebGLRenderingContext.prototype.bufferData;iframes[i].contentWindow.WebGLRenderingContext.prototype.getParameter=WebGLRenderingContext.prototype.getParameter}if(iframes[i].contentWindow.WebGL2RenderingContext){iframes[i].contentWindow.WebGL2RenderingContext.prototype.bufferData=WebGL2RenderingContext.prototype.bufferData;iframes[i].contentWindow.WebGL2RenderingContext.prototype.getParameter=WebGL2RenderingContext.prototype.getParameter}}}}"""})
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """var rand={"noise":function(){var SIGN=Math.random()<Math.random()?-1:1;return Math.floor(Math.random()+SIGN*Math.random())},"sign":function(){const tmp=[-1,-1,-1,-1,-1,-1,+1,-1,-1,-1];const index=Math.floor(Math.random()*tmp.length);return tmp[index]}};Object.defineProperty(HTMLElement.prototype,"offsetHeight",{get(){const height=Math.floor(this.getBoundingClientRect().height);const valid=height&&rand.sign()===1;const result=valid?height+rand.noise():height;return result}});Object.defineProperty(HTMLElement.prototype,"offsetWidth",{get(){const width=Math.floor(this.getBoundingClientRect().width);const valid=width&&rand.sign()===1;const result=valid?width+rand.noise():width;return result}});document.documentElement.dataset.fbscriptallow=true;if(document.documentElement.dataset.fbscriptallow!=="true"){const iframes=[...window.top.document.querySelectorAll("iframe[sandbox]")];for(var i=0;i<iframes.length;i++){if(iframes[i].contentWindow){if(iframes[i].contentWindow.HTMLElement){iframes[i].contentWindow.HTMLElement.prototype.offsetWidth=HTMLElement.prototype.offsetWidth;iframes[i].contentWindow.HTMLElement.prototype.offsetHeight=HTMLElement.prototype.offsetHeight}}}}"""})
        self.driver.maximize_window()
        self.longin()

    def activate_recaptcha(self, api):
        """
            激活 reCAPTCHA 人机验证，并跳转至声纹识别界面，返回声源文件的下载地址
            :param api: 为了消除 driver 指纹特征，可在高并发场景使用  undetected_chromedriver.v2 替代 selenium
            """
        # 定位并切换至 reCAPTCHA iframe
        recaptcha_iframe = WebDriverWait(api, 10).until(presence_of_element_located((
            By.XPATH, "//iframe[@title='reCAPTCHA']"
        )))
        api.switch_to.frame(recaptcha_iframe)
        # 点击并激活 recaptcha
        api.find_element(By.CLASS_NAME, "recaptcha-checkbox-border").click()
        # 回到 main_frame
        api.switch_to.default_content()
        # 切换到 main_frame 中的另一个 frame
        api.switch_to.frame(api.find_element(By.XPATH, "//iframe[@title='recaptcha challenge expires in two minutes']"))
        sleep(randint(2, 4))
        # 点击切换到声纹识别界面
        api.find_element(By.ID, "recaptcha-audio-button").click()
        sleep(randint(2, 4))
        # 点击播放按钮
        try:
            api.find_element(By.XPATH, "//button[@aria-labelledby]").click()
        except NoSuchElementException:
            return ""
        # 定位声源文件 url
        audio_url = api.find_element(By.ID, "audio-source").get_attribute("src")
        print('音频 audio_url::', audio_url)
        return audio_url

    def handle_audio(self, audio_url: str) -> str:
        """
        reCAPTCHA Audio 音频文件的定位、下载、转码
        :param audio_url: reCAPTCHA Audio 链接地址
        :param dir_audio_cache: 音频缓存目录
        :return:
        """
        # 拼接音频缓存文件路径
        timestamp_ = int(time.time())
        path_audio_mp3 = "audio.mp3"
        path_audio_wav = "audio.wav"
        # 将声源文件下载到本地
        print(audio_url)
        res = requests.get(audio_url)
        try:
            os.remove("audio.mp3")
        except Exception:
            audio_file = open("audio.mp3", "wb")
            audio_file.write(res.content)
            audio_file.close()
        else:
            audio_file = open("audio.mp3", "wb")
            audio_file.write(res.content)
            audio_file.close()
        # urllib.request.urlretrieve(audio_url, path_audio_mp3)
        # 转换音频格式 mp3 --> wav
        pydub.AudioSegment.from_mp3(path_audio_mp3).export(path_audio_wav, format="wav")
        # 返回 wav 格式的音频文件 增加识别精度
        return path_audio_wav

    def parse_audio(self, path_audio_wav: str, language: str = None) -> str:
        """
        声纹识别，音频转文本
        :param path_audio_wav: reCAPTCHA Audio 音频文件的本地路径（wav格式）
        :param language: 音频文件的国际化语言格式，默认 en-US 美式发音。非必要参数，但可增加模型精度。
        """
        language = "en-US" if language is None else language
        # 将音频读入并切割成帧矩阵
        recognizer = Recognizer()
        audio_file = AudioFile(path_audio_wav)
        with audio_file as stream:
            audio = recognizer.record(stream)
        # 流识别
        answer: str = recognizer.recognize_google(audio, language=language)
        # 返回短音频对应的文本(str)，en-US 情况下为不成句式的若干个单词
        return answer

    def submit_recaptcha(self, api, answer: str) -> bool:
        """
        提交 reCAPTCHA 人机验证，需要传入 answer 文本信息，需要 action 停留在可提交界面
        :param api: 为了消除 driver 指纹特征，可在高并发场景使用  undetected_chromedriver.v2 替代 selenium
        :param answer: 声纹识别数据
        """
        try:
            # 定位回答框
            input_field = api.find_element(By.ID, "audio-response")
            # 提交文本数据
            input_field.clear()
            input_field.send_keys(answer.lower())
            # 使用 clear + ENTER 消除控制特征
            print('输入识别后的内容：', answer)
            input_field.send_keys(Keys.ENTER)
            return True
        except (NameError, NoSuchElementException):
            return False

    def _google(self, url):
        resp = requests.post('http://192.168.10.228:22222', data={'url': url}, timeout=15).json()
        return resp['answer']

    def longin(self):
        sleep_num = 0
        for i in range(10):
            try:
                # https://members.helium10.com/user/signin?re=L2NlcmVicm8=
                self.driver.get('https://members.helium10.com/user/signin')
                sleep(randint(2, 4))
                search_box = self.driver.find_element_by_id('loginform-email')
                search_box.send_keys(self.email_name)
                sleep(randint(1, 2))
                search_box = self.driver.find_element_by_id('loginform-password')
                search_box.send_keys(self.pw)
                sleep(randint(1, 2))
                try:
                    self.driver.execute_script('document.querySelector("#CaptchaId").click()')
                    sleep(randint(1, 2))
                except:
                    sleep(randint(1, 2))
                try:
                    self.driver.execute_script('document.querySelector("#login-form > button").click()')
                except:
                    pass
                # 设置显式等待，等待元素可见
                wait = WebDriverWait(self.driver, 10)
                try:
                    element = wait.until(EC.presence_of_element_located(
                        (By.XPATH, '//*[@id="h10-style-container"]/div[2]/header/div[2]/div[7]/div')))
                    # 使用XPath定位元素
                    element = self.driver.find_element(By.XPATH,
                                                       '//*[@id="h10-style-container"]/div[2]/header/div[2]/div[7]/div')
                    # 点击元素
                    element.click()
                    sleep(4)
                    click_English = self.driver.find_element(By.XPATH,
                                                             '//div[@data-testid="undefined-dropdown"]//div[@data-value="en"]')
                    # 点击元素
                    click_English.click()
                    sleep(randint(8, 10))
                except:
                    pass
                html = self.driver.page_source
                if "loginform-recaptcha-recaptcha-login-form" in html:
                    print('出现验证码。开始识别验证码')
                    audio_url: str = self.activate_recaptcha(self.driver)
                    if not audio_url:
                        raise WebDriverException
                    # 音频转码 （MP3 --> WAV） 增加识别精度
                    path_audio_wav: str = self.handle_audio(audio_url=audio_url)
                    answer: str = self.parse_audio(path_audio_wav)
                    # answer = self._google(audio_url)
                    print('识别后 文本内容 answer:', answer)
                    # 定位输入框并填写文本数据
                    response = self.submit_recaptcha(self.driver, answer=answer)
                    if not response:
                        raise TimeoutException
                sleep(randint(1, 2))
                self.driver.switch_to.default_content()
                sleep(randint(1, 2))
                try:
                    self.driver.execute_script('document.querySelector("#login-form > button").click()')
                except:
                    try:
                        self.driver.execute_script(
                            """document.querySelector('button[type="submit"][class="btn btn-secondary btn-block"]').click()""")
                    except:
                        pass

                sleep(randint(6, 10))
                current_url = self.driver.current_url
                print(current_url, '2332222222222222232323')
                if "/signin" in current_url:
                    # 音频转码 （MP3 --> WAV） 增加识别精度
                    audio_url: str = self.activate_recaptcha(self.driver)
                    if not audio_url:
                        raise WebDriverException
                    # 音频转码 （MP3 --> WAV） 增加识别精度
                    path_audio_wav: str = self.handle_audio(audio_url=audio_url)
                    answer: str = self.parse_audio(path_audio_wav)
                    print('识别后 文本内容 answer:', answer)
                    # 定位输入框并填写文本数据
                    response = self.submit_recaptcha(self.driver, answer=answer)
                    if not response:
                        raise TimeoutException
                    sleep(randint(1, 2))
                    self.driver.switch_to.default_content()
                    sleep(randint(1, 2))
                    try:
                        self.driver.execute_script('document.querySelector("#login-form > button").click()')
                    except:
                        self.driver.execute_script(
                            """document.querySelector('button[type="submit"][class="btn btn-secondary btn-block"]').click()""")
                    sleep(randint(4, 6))
                account_url = self.driver.current_url
                self.account_id = re.findall(r'accountId=(\d+)', account_url)[0]
                print('self.account_id::', self.account_id)
                break
            except Exception as e:
                print(traceback.format_exc(), e, '登录Log In')
                time.sleep(2)
                current_url = self.driver.current_url
                if "signin" not in current_url:
                    self.account_id = re.findall(r'accountId=(\d+)', current_url)[0]
                    print('self.account_id::', self.account_id)
                    break
                sleep_num += 2
                # 设置headers
                ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(100, 114)}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'

                self.driver.execute_cdp_cmd("Network.setExtraHTTPHeaders",
                                            {"headers":
                                                {
                                                    "User-Agent": ua,
                                                }
                                            })
                sleep(5)
                if sleep_num > 10:
                    sleep_num = 0
                    self.driver.close()
                    self.driver.quit()
                    sleep(3)
                    self.web_drver()
                continue

    def verify(self):
        if 'Enable Multi-Factor Authentication (MFA)' in self.driver.page_source:
            print('出现是否启用安全验证 No, I don’t want to enable MFA')
            try:
                resp = etree.HTML(self.driver.page_source)
                button_class = resp.xpath(
                    '//button[contains(text(),"No, Maybe Later")]/@class|//button[contains(text(),"No, I don’t want to enable MFA")]/@class')[
                    0]
                self.driver.execute_script(
                    f"""document.querySelector("div[class='{button_class}']").click()""")
                time.sleep(2)
            except:
                pass

    def webdrvier_html(self, asin, asinstype):
        # 点击选择站点
        for i in range(4):
            try:
                _url = self.driver.current_url
                if "concurrent-sessions" in _url or 'signin' in _url:
                    self.longin()
                if asin not in self.err_asin_list and self.useremail_state:
                    print('cerebro界面', self.site_name_url)
                    self.driver.get(f'https://members.helium10.com/cerebro?accountId={self.account_id}')
                    time.sleep(10)
                    if 'You are viewing a demo of Cerebro' in self.driver.page_source:
                        print(self.email_name, '账号过期')
                        self.driver.refresh()
                        continue
                        # self.useremail_state = False
                        # self.send_ms('You are viewing a demo of Cerebro')
                    self.verify()
                    if self.site_name_url == 'Amazon.co.uk':
                        self.site_name_csv = 'GB'
                        host = 'amazon.co.uk'
                        alt = 'United Kingdom'
                    elif self.site_name_url == 'Amazon.de':
                        self.site_name_csv = 'DE'
                        host = 'amazon.de'
                        alt = 'Germany'
                    elif self.site_name_url == 'Amazon.fr':
                        self.site_name_csv = 'FR'
                        host = 'amazon.fr'
                        alt = 'France'
                    elif self.site_name_url == 'Amazon.es':
                        self.site_name_csv = 'ES'
                        host = 'amazon.es'
                        alt = 'Spain'
                    elif self.site_name_url == 'Amazon.it':
                        self.site_name_csv = 'IT'
                        host = 'amazon.it'
                        alt = 'Italy'
                    elif self.site_name_url == 'Amazon.co.jp':
                        self.site_name_csv = 'JP'
                        host = 'amazon.co.jp'
                        alt = 'Japan'
                    elif self.site_name_url == 'Amazon.co.mx':
                        self.site_name_csv = 'MX'
                        host = 'amazon.co.mx'
                        alt = 'Mexico'
                    else:
                        self.site_name_csv = 'US'
                        host = 'amazon.com'
                        alt = 'United States'
                    try:
                        self.driver.execute_script(
                            f"""document.querySelector("img[loading='lazy']").click()""")
                        time.sleep(1)
                    except:
                        self.driver.execute_script(
                            f"""document.querySelector("img[alt='{alt}']").click()""")
                        time.sleep(1)
                    self.verify()
                    # 切换站点
                    self.driver.execute_script(f"""document.querySelector("div[data-value='{host}']").click()""")
                    time.sleep(2)
                    # 输入asin
                    print('输入asin', asin)
                    if ',' in asin:
                        _asin_lsit = asin.split(',')
                        for _asin in _asin_lsit:
                            self.driver.find_element(By.XPATH, '//*[@id="re-container"]//input').send_keys(f'{_asin},')
                            time.sleep(1.5)
                    else:
                        self.driver.find_element(By.XPATH, '//*[@id="re-container"]//input').send_keys(f'{asin},')
                        time.sleep(1)
                    # 勾选排除变体
                    self.driver.execute_script("""document.querySelector("input[name='excludeVariations']").click()""")
                    # 点击 get keyword
                    time.sleep(1)
                    self.driver.execute_script('document.querySelector("#CerebroSearchButtons > button").click()')
                    time.sleep(3)
                    html = self.driver.page_source
                    if 'You have reached the limit of the uses' in html:
                        self.useremail_state = False
                        self.send_ms('You have reached the limit of the uses')
                        break
                    if 'Wrong entered data or no results' in html:
                        print('没有报告可下载11111', asin)
                        self.err_asin_list.append(asin)
                        break
                    elif 'Incorrect asins' in html:
                        print('中间框下载词 没有报告')
                        self.err_asins_adv_list.append(asin)
                        break
                    elif 'errorCodes.undefined' in html:
                        continue
                    html = self.driver.page_source
                    self.verify()
                    time.sleep(2)
                    try:
                        if 'searched this product before' in html:
                            print('33333333333')
                            self.driver.execute_script(
                                """document.querySelector("button[data-testid='runnewsearch']").click()""")
                            sleep(randint(20, 35))

                    except:
                        print('点击 run 报错')
                    # 点击下载
                    time.sleep(8)
                    self.driver.execute_script('window.scrollBy(0, 600);')
                    time.sleep(1)
                    html = self.driver.page_source
                    if 'You have reached the limit of the uses' in html:
                        self.useremail_state = False
                        self.send_ms('You have reached the limit of the uses')
                        break
                    if 'Wrong entered data or no results' in html:
                        print('没有报告可下载2222', asin)
                        self.err_asin_list.append(asin)
                        break
                    elif 'Incorrect asins' in html:
                        print('中间框下载词 没有报告')
                        self.err_asins_adv_list.append(asin)
                        break
                    elif 'errorCodes.undefined' in html:
                        continue
                    self.verify()

                    resp = etree.HTML(html)
                    try:
                        div_class = resp.xpath(
                            '//div[contains(text(),"Amazon Choice")]/parent::div/following-sibling::div/@class')
                    except:
                        time.sleep(2.5)
                        try:
                            div_class = resp.xpath(
                                '//div[contains(text(),"Amazon Choice")]/parent::div/following-sibling::div/@class')
                        except:
                            print('报错22222222222222')
                    if asinstype:
                        time.sleep(1.5)
                        print('点击选择亚马逊精选 勾选')
                        try:
                            script = f"""
                                                                            const elements = document.querySelectorAll("div[class='{div_class[0]}']>div");
                                                                            const secondElement = elements[1];
                                                                            secondElement.click();
                                                                            """
                        except:
                            if i == 2:
                                self.err_asins_adv_list.append(asin)
                        self.driver.execute_script(script)
                        time.sleep(2)
                        html1 = self.driver.page_source
                        resp1 = etree.HTML(html1)
                        span_class = resp1.xpath(
                            '//span[contains(text(),"Analyzed product")]/parent::div/following-sibling::div/@class')[0]
                        # 选择亚马逊精选参数1
                        self.driver.execute_script(
                            f"""document.querySelector("div[class='{span_class}']").click()""")
                        time.sleep(2)
                        # 选择亚马逊精选参数2
                        self.driver.execute_script(
                            f"""document.querySelector("div[class='{span_class}']").click()""")
                        time.sleep(2)
                        # 点击添加
                        self.driver.execute_script(
                            """document.querySelector("button[data-testid='applyfilters']").click()""")
                    time.sleep(6.5)
                    # 下载报告
                    # 点击下载csv按钮
                    self.driver.execute_script(
                        """document.querySelector(" button[data-testid='exportdata']").click()""")
                    # 点击选择csv
                    time.sleep(1)
                    print('点击选择csv')
                    self.driver.execute_script("""document.querySelector("div[data-testid='csv']").click()""")
                    time.sleep(15)
                    break
            except Exception as e:
                print('详细报错')
                print(traceback.format_exc(), e)
                self.driver.refresh()
                time.sleep(1)
                if i == 2:
                    self.err_asin_list.append(asin)
                continue

    def nex_page(self, asin_list, asinstype=None):
        print('asin_list::', asin_list)
        if asinstype is None:
            for asin in asin_list:
                self.webdrvier_html(asin, asinstype)
        else:
            print('下载 优质词。多个asin')
            self.webdrvier_html(','.join(asin_list), 1)

    def read_db_data(self, sku):
        while True:
            try:
                if self.read_product_sku(sku):
                    with self.engine_us.begin() as conn:
                        sql_read = f"SELECT asin, id,site,sku FROM {self.db_syn} WHERE STATE = 1 and site='{self.site_url}' and sku='{sku}' limit 10 FOR UPDATE;"
                        print(sql_read)
                        a = conn.execute(sql_read)
                        self.df_read = pd.DataFrame(a, columns=['asin', 'id', 'site', 'sku'])
                        self.df_read.drop_duplicates(['asin'], inplace=True)
                        if self.df_read.shape[0] == 0:
                            print('*********** asin 数据抓取 完毕 *****************')
                            self.stop_item_queue = False
                            return []
                        self.index_tuple = tuple(self.df_read['id'])
                        if len(self.index_tuple) == 1:
                            sql_update = f"""UPDATE {self.db_syn} a set state=2, user_name='{self.email_name}' where a.id in ({self.index_tuple[0]})"""
                        else:
                            sql_update = f"""UPDATE {self.db_syn} a set state=2, user_name='{self.email_name}' where a.id in {self.index_tuple}"""
                        conn.execute(sql_update)
                    asin_list = list(self.df_read.asin)
                    print(asin_list)
                    self.site_name_url = list(self.df_read.site)[0]
                    self.sku = list(self.df_read.sku)[0]
                    return asin_list
                else:
                    self.asin_state_5_list.append(sku)
                    return []
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                time.sleep(10)
                self.mysql_connect()
                continue

    def read_db_sku(self):
        while True:
            try:
                sql = f"""SELECT DISTINCT sku,token from all_h10_syn where site='{self.site_url}' and state = 1 """
                df = pd.read_sql(sql, con=self.engine_us)
                self.sku_data_list = list(df.sku + '|-|' + df.token)
                print(self.sku_data_list)
                break
            except:
                print('读取sku 失败0')
                time.sleep(30)
                self.mysql_connect()
                continue

    def read_product_sku(self, sku):
        # for i in range(5):
        #     try:
        #         sql = f"select id from product_audit   where product_sku ='{sku}' and product_audit_status = '5-1' ;"
        #         print(sql)
        #         df = pd.read_sql(sql, con=self.engine_adv)
        #         id_list = list(df.id)
        #         print('检查是否手动下载：', id_list)
        #         if id_list:
        #             print('id_list::', id_list)
        #             return False
        #         else:
        #             print('可以')
        #             return True
        #     except:
        #         self.mysql_inv()
        return True

    def read_db_asin(self):
        while True:
            try:
                sql_read = f"SELECT sku, site, GROUP_CONCAT(asin SEPARATOR ',') AS asin_list FROM {self.db_syn} WHERE site = '{self.site_url}' and sku='{self.sku}';"
                df = pd.read_sql(sql_read, con=self.engine_us)
                sku_list = list(df.sku)
                if sku_list:
                    data_list = list(df.sku + '|' + df.site + '|' + df.asin_list)
                    return data_list
                else:
                    return None
            except:
                self.mysql_connect()

    def db_change_state_common(self, asin_list, err_type):
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        id_tuple = tuple(df.id)
        if err_type == 3 or err_type == 1:
            state = 2
        elif err_type == 4:
            state = 3

        while True:
            try:
                with self.engine_us.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_syn} set state={err_type} where id in ({id_tuple[0]}) and state={state};"
                        else:
                            sql_update = f"update {self.db_syn} set state={err_type} where id in {id_tuple} and state={state};"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print("db_change_state_common", e, f"\n{traceback.format_exc()}")
                self.mysql_connect()
                continue

    def update_sku_syn(self):
        if self.asin_state_5_list:
            while True:
                try:
                    with self.engine_us.begin() as conn:
                        if len(self.asin_state_5_list) == 1:
                            sql_update = f"update {self.db_syn} set state=5 where sku in ('{self.asin_state_5_list[0]}')"
                        else:
                            sql_update = f"update {self.db_syn} set state=5 where sku in {tuple(self.asin_state_5_list)}"
                        conn.execute(sql_update)
                    self.asin_state_5_list = []
                    break
                except Exception as e:
                    print("update_sku_syn", e, f"\n{traceback.format_exc()}")
                    self.mysql_connect()
                    continue

    def del_file(self, path_data, asin_type):
        try:
            for i in os.listdir(path_data):  # os.listdir(path_data)#返回一个列表，里面是当前目录下面的所有东西的相对路径
                if asin_type is None:
                    for asin in self.asin_list:
                        if asin in i:  # 删除指定 Amazon Search Terms_Search 开头的文件
                            print("需要 删除 的 文件：核心", i)
                            file_data = path_data + "\\" + i  # 当前文件夹的下面的所有东西的绝对路径
                            if os.path.isfile(file_data) == True:  # os.path.isfile判断是否为文件,如果是文件,就删除.如果是文件夹.递归给del_file.
                                os.remove(file_data)
                else:
                    if asin_type in i:  # 删除指定 Amazon Search Terms_Search 开头的文件
                        print("需要 删除 的 文件：优质", i)
                        file_data = path_data + "\\" + i  # 当前文件夹的下面的所有东西的绝对路径
                        if os.path.isfile(file_data) == True:  # os.path.isfile判断是否为文件,如果是文件,就删除.如果是文件夹.递归给del_file.
                            os.remove(file_data)
        except:
            print(path_data, '删除111111111')

    def read_files(self, path, asin):
        columns_to_include = ['Keyword Phrase', 'Cerebro IQ Score', 'Search Volume', 'Search Volume Trend',
                              'Sponsored ASINs',
                              'Competing Products', 'CPR', 'Title Density', 'Amazon Recommended', 'Organic',
                              'Amazon Rec. Rank', 'Sponsored Rank', 'Organic Rank']
        try:
            time_strftime = time.strftime("%Y-%m-%d", time.localtime())
            file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin}_{time_strftime}.csv'
            df = pd.read_csv(file_path, usecols=columns_to_include)
            print('读取文件11111：：', file_path)
        except:
            try:
                # 获取当前日期
                current_date = datetime.date.today()
                # 计算前一天日期
                previous_date = current_date - datetime.timedelta(days=1)
                # 格式化前一天日期为字符串
                previous_date_str = previous_date.strftime("%Y-%m-%d")
                file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin}_{previous_date_str}.csv'
                df = pd.read_csv(file_path, usecols=columns_to_include)
                print('读取文件2222222：：', file_path)
            except:
                self.driver.refresh()
                time.sleep(5)
                print('重新下载文件：', asin,path)
                self.webdrvier_html(asin, None)
                time.sleep(5)
                time_strftime = time.strftime("%Y-%m-%d", time.localtime())
                file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin}_{time_strftime}.csv'
                df = pd.read_csv(file_path, usecols=columns_to_include)
                print('读取文件333333：：', file_path)
        return df

    def sava_data(self, path):
        print('self.err_asin_list::', self.err_asin_list)
        df_asin_data_list = []
        for asin in self.asin_list:
            print(asin, '333333333333333', self.err_asin_list)
            if asin not in self.err_asin_list:
                df = self.read_files(path, asin)
                # columns_to_include = ['Keyword Phrase', 'Cerebro IQ Score', 'Search Volume', 'Search Volume Trend',
                #                       'Sponsored ASINs',
                #                       'Competing Products', 'CPR', 'Title Density', 'Amazon Recommended', 'Organic',
                #                       'Amazon Rec. Rank', 'Sponsored Rank', 'Organic Rank']
                # try:
                #     time_strftime = time.strftime("%Y-%m-%d", time.localtime())
                #     file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin}_{time_strftime}.csv'
                #     print('file_path1111', file_path)
                #     df = pd.read_csv(file_path, usecols=columns_to_include)
                #     print('读取文件：：', file_path)
                # except:
                #     # 获取当前日期
                #     current_date = datetime.date.today()
                #     # 计算前一天日期
                #     previous_date = current_date - datetime.timedelta(days=1)
                #     # 格式化前一天日期为字符串
                #     previous_date_str = previous_date.strftime("%Y-%m-%d")
                #     file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin}_{previous_date_str}.csv'
                #     print('file_path22222222222', file_path)
                #     df = pd.read_csv(file_path, usecols=columns_to_include)
                #     print('读取文件：：', file_path)
                df.rename(columns={
                    'Keyword Phrase': 'keyword',
                    'Cerebro IQ Score': 'cerebro_iq_score',
                    'Search Volume': 'search_volume',
                    'Search Volume Trend': 'search_volume_trend',
                    'Sponsored ASINs': 'sponsored_asins',
                    'Competing Products': 'competing_product',
                    'CPR': 'cpr',
                    'Title Density': 'title_desity',
                    'Amazon Recommended': 'amazon_recommended',
                    'Organic': 'organic',
                    'Amazon Rec. Rank': 'amazon_recommended_rank',
                    'Sponsored Rank': 'sponsored_rank',
                    'Organic Rank': 'organic_rank'
                }, inplace=True)
                df['asin'] = asin
                df['sku'] = self.sku
                df_asin_data_list.append(df)
        if df_asin_data_list:
            df_asin = pd.concat(df_asin_data_list)
            df_asin.drop_duplicates(['asin', 'keyword', 'sku'], inplace=True)  # 去重
            df_asin['keyword'] = df_asin['keyword'].str.replace('%', '%%')
            df_asin.replace({np.nan: None}, inplace=True)  # 将 NaN 替换为 None
            data_list = df_asin.values.tolist()
            print('data_list::', len(data_list))
            print('存储数据 ：：')
            print(data_list, '233333333333333333331232323')
            while True:
                try:
                    print('存储数据 ：：')
                    with self.engine.begin() as conn:
                        conn.execute(
                            f"insert into {self.site_name}_industrial (keyword, cerebro_iq_score, search_volume,search_volume_trend, sponsored_asins,competing_product,cpr,title_desity,amazon_recommended,organic,amazon_recommended_rank,sponsored_rank,organic_rank,asin,sku) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE keyword = values(keyword), asin = values (asin),sku=values (sku)",
                            data_list)
                    break
                except Exception as e:
                    print("sava_data:", e, f"\n{traceback.format_exc()}")
                    self.mysql_connect(self.site_name)
                    time.sleep(30)

    def select_sdv(self, data_list, site):
        if data_list:
            for data in data_list:
                print('调接口更新数据：', data)
                sku_data_list = data.split('|')
                sku = sku_data_list[0]
                if self.read_product_sku(sku):
                    self.sku_state = False
                    if sku not in self.sku_list:
                        self.sku_list.append(sku)
                        print('sku::', sku)
                        asins = sku_data_list[2]
                        while True:
                            try:
                                self.get_token()
                                headers = {
                                    'X-Access-Token': self.val_str
                                }
                                url = f'http://120.79.147.190:8080/soundasia_selection/updateKeyWords/selWords?site={site}&asins={asins}'
                                print(url)
                                for i in range(5):
                                    try:
                                        resp = requests.get(url, timeout=30, headers=headers).json()
                                        self.data = {"sku": self.sku,
                                                     "token": self.token,
                                                     "adv_up_down_keywords": resp['result']['updown'],
                                                     "adv_core_keywords": resp['result']['core']
                                                     }
                                        break
                                    except Exception as e:
                                        print("请求java 接口报错:", e, f"\n{traceback.format_exc()}")
                                        time.sleep(3)
                                        continue
                                print('type_resp::',type(resp))
                                # core核心词
                                core_list = resp['result']['core'].split('\n')
                                # updown长尾词
                                updown_list = resp['result']['updown'].split('\n')
                                core = '\n'.join(core_list).replace('%', '%%')
                                updown = '\n'.join(updown_list).replace('%', '%%')
                                data_list = [[self.sku, self.site_url, core, updown]]
                                df_save = pd.DataFrame(data_list, columns=['product_sku', 'site', 'adv_core_keywords',
                                                                           'adv_up_down_keywords'])
                                try:
                                    if df_save.shape[0] > 0:
                                        with self.engine_us.begin() as conn:
                                            if len(set(df_save.product_sku)) == 1:
                                                sql_delete_bsr = f"delete from product_audit where product_sku in ('{tuple(df_save.product_sku)[0]}') and site='{self.site_url}';"
                                            else:
                                                sql_delete_bsr = f"delete from product_audit where product_sku in {tuple(set(df_save.product_sku))}  and site='{self.site_url}';"
                                            conn.execute(sql_delete_bsr)
                                        df_save.to_sql("product_audit", con=self.engine_us,
                                                       if_exists='append', index=False)
                                except Exception as e:
                                    print("save_competition:", e, f"\n{traceback.format_exc()}")
                                    print('存储优质词报错。重连数据库')
                                    time.sleep(5)
                                break
                            except Exception as e:
                                print(f'存储 core核心词 updown长尾词 报错, \n{e, traceback.format_exc()}')
                                self.mysql_connect()
                                time.sleep(5)
                else:
                    self.asin_state_5_list.append(sku)
                    self.sku_state = True

    # 定义一个函数来获取三列的最小值（忽略为0的值）
    def get_min(self, row):
        row = [x for x in row if x != 0]
        return min(row) if row else 0

    def save_competition(self, path, asin_list, site_url, site):
        print('self.err_asins_adv_list;;', self.err_asins_adv_list)
        if asin_list[0] not in ''.join(self.err_asins_adv_list):
            try:
                time_strftime = time.strftime("%Y-%m-%d", time.localtime())
                file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin_list[0]}_{time_strftime}.csv'
                print('file_pathsave_competition1111111', file_path)
                df = pd.read_csv(file_path)
            except:
                # 获取当前日期
                current_date = datetime.date.today()
                # 计算前一天日期
                previous_date = current_date - datetime.timedelta(days=1)
                # 格式化前一天日期为字符串
                previous_date_str = previous_date.strftime("%Y-%m-%d")
                file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin_list[0]}_{previous_date_str}.csv'
                print('file_pathsave_competition2222', file_path)
                df = pd.read_csv(file_path)
            # 创建一个字典来映射原始列名和新的列名
            column_mapping = {
                'Keyword Phrase': 'keyword_phrase',
                'Position (Rank)': 'rank0',
            }
            # 将asin_list中的元素作为列名，并给它们一个新的列名(rank1, rank2, rank3, ...)
            for i, asin in enumerate(asin_list[1:], start=1):
                column_mapping[asin] = f'rank{i}'
            df.rename(columns=column_mapping, inplace=True)
            # 只保留包含rank的列
            rank_columns = [col for col in df.columns if col.startswith('rank')]
            df = df[['keyword_phrase'] + rank_columns]
            # 将特殊字符'-'替换为0，并将rank列转换为整数类型
            df.replace('-', 0, inplace=True)
            for col in rank_columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').astype(int)
            # 在新的一列中存储最小值
            df['rank'] = df[rank_columns].apply(self.get_min, axis=1)
            # 创建新的DataFrame，只保留'keyword_phrase'列和'min_value'列，并排除为0的最小值
            new_df = df[df['rank'] != 0][['keyword_phrase', 'rank']]
            print(new_df.shape)
            print(new_df.columns)
            new_df.replace({np.nan: None}, inplace=True)  # 将 NaN 替换为 None
            # print(' 低竞争 优质词')
            competition_phrase_keywords = '&&&'.join(list(new_df['keyword_phrase']))
            rank_list = list(new_df['rank'])
            competition_phrase_rank = ','.join(str(x) for x in rank_list)
            self.data['competition_phrase_keywords'] = competition_phrase_keywords
            self.data['competition_phrase_rank'] = competition_phrase_rank
            self.data['site'] = site_url
            self.requests_updateSkuByAsinH10Data_api(self.data)
            new_df['sku'] = self.sku
            new_df['site'] = site_url
            try:
                if new_df.shape[0] > 0:
                    with self.engine_us.begin() as conn:
                        if len(set(new_df.keyword_phrase)) == 1:
                            sql_delete = f"""delete from adv_low_competition_phrase where site = '{site_url}' and sku='{self.sku}' and keyword_phrase in ("{tuple(new_df.keyword_phrase)[0]}");"""
                            conn.execute(sql_delete)
                        else:
                            sql_delete = text(
                                "DELETE FROM adv_low_competition_phrase WHERE site = :site_url AND sku = :sku AND keyword_phrase IN :keyword_phrases;"
                            )
                            print("sql_delete:", sql_delete)
                            conn.execute(sql_delete, site_url=site_url, sku=self.sku,
                                         keyword_phrases=tuple(set(new_df['keyword_phrase'].tolist())))
                    new_df.to_sql("adv_low_competition_phrase", con=self.engine_us, if_exists='append',
                                  index=False)
            except Exception as e:
                print("save_competition:", e, f"\n{traceback.format_exc()}")
                print('存储优质词报错。重连数据库')
                self.mysql_connect()
                time.sleep(5)
        # 修改抓取状态
        self.db_change_state_common(self.asin_list, 3)

    def requests_updateSkuByAsinH10Data_api(self, data):
        print('调用接口：', self.data)
        #
        # url = 'http://120.24.90.10:80/api/ComprehensiveProject/updateSkuByAsinH10Data'
        url = 'https://xcu.yswg.com.cn/api/ComprehensiveProject/updateSkuByAsinH10Data'
        try:
            res = requests.post(url, json=data, timeout=30)
            print(res.status_code)
            print(res.text)
            print(res.json())
        except:
            pass

    def send_ms(self, ms):
        if self.useremail_state == False:
            url = 'http://47.112.96.71:8082/selection/sendMessage'
            data = {
                'account': 'pengyanbing,qianjin',
                'title': f'H10 {self.email_name} 账号异常',
                'content': ms
            }
            print(data)
            requests.post(url=url, data=data, timeout=15)

    def run(self):
        user_pw_list = self.get_ip_address()
        if user_pw_list:
            path = user_pw_list[0]
            # 判断目录是否存在
            if os.path.exists(path):
                print(f"{path} 目录存在。")
            else:
                print(f"{path} 目录不存在。")
                path = r'C:\Users\Administrator\Downloads'
            print('当前路径：', path)
            self.email_name = user_pw_list[1]
            self.pw = user_pw_list[2]
        else:
            path = r'C:\Users\ASUS\Downloads'
            print('当前路径：', path)
            self.email_name = 'H10961961@outlook.com'
            self.pw = 'soundasia961961@'
        self.web_drver()
        while True:
            self.data = {}
            self.sku_list = []
            self.err_asins_adv_list = []
            try:
                self.driver.refresh()
            except:
                continue
            time.sleep(4)
            self.driver.execute_script("localStorage.clear();")  # 清除本地存储
            time.sleep(0.5)
            self.driver.execute_script("sessionStorage.clear();")  # 清除会话存储
            time.sleep(0.5)
            self.driver.execute_script(
                "caches.keys().then(function(names) { for (let name of names) { caches.delete(name); } });")
            self.driver.execute_script("window.performance.clearResourceTimings();")
            time.sleep(2)
            login_url = self.driver.current_url
            if "concurrent-sessions" in login_url or 'signin' in login_url:
                self.longin()
            for site in ['us', 'uk', 'de', 'fr', 'es', 'it', 'mx']:
                print(site)
                if site == 'uk':
                    self.site_url = 'Amazon.co.uk'
                elif site == 'de':
                    self.site_url = 'Amazon.de'
                elif site == 'fr':
                    self.site_url = 'Amazon.fr'
                elif site == 'es':
                    self.site_url = 'Amazon.es'
                elif site == 'it':
                    self.site_url = 'Amazon.it'
                elif site == 'jp':
                    self.site_url = 'Amazon.co.jp'
                elif site == 'mx':
                    self.site_url = 'Amazon.co.mx'
                elif site == 'us':
                    self.site_url = 'Amazon.com'
                self.site_name = site
                self.mysql_connect(site)
                # 获取未抓取的sku
                self.read_db_sku()
                for sku_token in self.sku_data_list:
                    sku_token_list = sku_token.split('|-|')
                    sku = sku_token_list[0]
                    self.token = sku_token_list[1]
                    # 根据sku 获取asin
                    self.asin_list = self.read_db_data(sku)
                    self.sku = sku
                    if self.asin_list:
                        # 下载报告
                        self.nex_page(self.asin_list, asinstype=None)
                        # 判断账号是否有问题：
                        if self.useremail_state:
                            # 存储选品mysql
                            self.sava_data(path)
                            # 获取已经完成的sku下的asin
                            data_list = self.read_db_asin()
                            # 调用java接口。更新核心词 长尾词 同时调用查询sku是否已经被查找词
                            self.select_sdv(data_list, site)
                            self.del_file(path, None)
                            if self.sku_state == False:
                                self.nex_page(self.asin_list, asinstype=1)
                                self.save_competition(path, self.asin_list, self.site_url, site)
                            elif self.data:
                                self.requests_updateSkuByAsinH10Data_api(self.data)
                            # 删除下载报告
                            self.del_file(path, self.asin_list[0])
                            # 修改状态4
                            self.db_change_state_common(self.err_asin_list, 4)
                            # 修改状态5
                            self.update_sku_syn()
                        else:
                            self.db_change_state_common(self.asin_list, 1)
                            time.sleep(3600)
                            self.useremail_state = True
                            break
                    else:
                        self.update_sku_syn()
                self.mysql_connect(site)
            time.sleep(randint(20, 50))
            new_date = datetime.datetime.now().strftime("%H")
            print(new_date)
            for i in range(10):
                if new_date == '22':
                    time.sleep(3800)
                    new_date = datetime.datetime.now().strftime("%H")
                    print(new_date)
                    if new_date == '08':
                        self.driver.refresh()

if __name__ == '__main__':
    H10().run()
