import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
from queue import Queue
import time
import random
from lxml import etree
import json
from curl_cffi import requests
import traceback
import pandas as pd
import threading
import urllib3
import re
import datetime
from amazon_spider.VPS_IP import is_internet_available
import uuid

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings()

"""店铺收藏 asin 收藏 抓取"""


class async_account_name_products(BaseUtils):
    def __init__(self, site_name='us', read_size=1, proxy_name=None):
        super().__init__()
        self.site_name = site_name  # 站点
        print('代理 proxy_nameproxy_nameproxy_name: ', proxy_name)
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, spider="seller_account_product",
                                                   proxy_name=proxy_name)
        self.read_size = read_size
        self.init_db_names()
        self.requests_error_asin_list = []  # 1
        self.asin_not_found_list = []  # 4
        self.asin_not_seller_id = []  # 5
        self.asin_not_sure_list = []  # 6
        self.cookies_queue = Queue()  # cookie队列
        self.item_queue = Queue()  # 存储 item 详情数据队列
        self.queries_asin_queue = Queue()  # 需要爬取的asin队列
        self.asin_detail_list = []  # 存储asin 详情的列表
        self.asin_syn_list = []  # 存储asin 详情的列表
        self.delete_cookies_list = []  # 存储出现中国邮编的cookie
        self.stop_item_queue = True  # 用于是否退出循环存储的条件
        self.cookie_dict_delete_id = {}
        self.seller_account_num_list = []  # 存储店铺产品总数
        self.account_name_page_list = []  # 存储已经翻页的店铺名称
        self.user_asin_list = []  # 收藏asin
        if site_name == "us":
            self.site_url = 'https://www.amazon.com'
            self.host = 'www.amazon.com'
        elif site_name == 'uk':
            self.site_url = 'https://www.amazon.co.uk'  # 站点url
            self.host = 'www.amazon.co.uk'
        elif site_name == 'de':
            self.site_url = 'https://www.amazon.de'
            self.host = 'www.amazon.de'
        elif site_name == 'fr':
            self.site_url = 'https://www.amazon.fr'
            self.host = 'www.amazon.fr'
        elif site_name == 'es':
            self.site_url = 'https://www.amazon.es'
            self.host = 'www.amazon.es'
        elif site_name == 'it':
            self.site_url = 'https://www.amazon.it'
            self.host = 'www.amazon.it'
        self.headers_num_int = 0
        self.date_info = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        self.year = time.strftime('%Y', time.localtime(time.time()))

    def init_db_names(self):
        self.engine_pg6 = self.pg_connect_6()
        self.engine = self.mysql_connect()
        self.db_user_collection_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_user_collection_syn'][2:]

    def get_product(self, t_num):
        while True:
            if self.queries_asin_queue.empty() == False:
                querys = self.queries_asin_queue.get()
                if self.cookies_queue.empty():
                    cookies_dict = self.reuests_para_val.get_cookie()
                    self.cookie_dict_delete_id = cookies_dict
                    for ck in cookies_dict.values():
                        self.cookies_queue.put(ck)
                while 1:
                    cookie_str = self.cookies_queue.get()
                    if len(cookie_str) > 50:
                        try:
                            cookie_lsit = json.loads(cookie_str)
                        except:
                            cookie_lsit = eval(cookie_str)
                        cookie_dic = {}
                        try:
                            for i in cookie_lsit:
                                if i:
                                    cookie_dic[i["name"]] = i["value"]
                                else:
                                    continue
                            cookie_str = ''
                            for k, v in cookie_dic.items():
                                cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                            break
                        except:
                            cookie_str = ''
                            for k, v in cookie_lsit.items():
                                cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                            break
                    else:
                        break
                n = random.randint(70, 114)
                ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{n}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
                headers = {
                    'connection': 'close',
                    'authority': self.host,
                    'accept': 'text/html,*/*',
                    'accept-language': 'zh-CN,zh;q=0.9',
                    'cache-control': 'no-cache',
                    'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
                    'referer': self.site_url,
                    'sec-ch-ua-mobile': '?0',
                    'user-agent': ua
                }
                alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
                            's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
                k = ""
                for i in (0, random.randint(0, 26)):
                    k += random.choice(alphabet)
                headers[k] = str(uuid.uuid4())
                headers["cookie"] = cookie_str
                query = querys.split('|-|')
                end_time = query[2]  # 结束时间
                _url = query[1]  # 店铺url
                account_id = query[0]  # 卖家id
                if 'seller=' in _url and '&page=' not in _url:
                    scraper_url_list = re.findall(r'seller=(.*?)&', _url)
                    if len(scraper_url_list) == 0:
                        scraper_url_list = re.findall(r'seller=(.*)', _url)
                    scraper_url = f'{self.site_url}/s?me=' + scraper_url_list[0]
                else:
                    scraper_url = _url
                print('scraper_url::', scraper_url)
                headers['origin'] = scraper_url
                if '&page=' in scraper_url:
                    page = int(re.findall(r'&page=(\d+)&', scraper_url)[0])
                else:
                    page = 1
                try:
                    print("使用代理ip: ", self.reuests_para_val.proxy_name)
                    resp = requests.get(scraper_url, impersonate="chrome110", headers=headers,
                                        timeout=30, verify=False)
                    if ("Enter the characters you see below" in resp.text) or (
                            "Geben Sie die Zeichen unten ein" in resp.text) or (
                            "Introduce los caracteres que se muestran" in resp.text) or (
                            "Saisissez les caractères que vous voyez" in resp.text) or (
                            "Inserisci i caratteri visualizzati nello spazio" in resp.text):
                        print(f"{self.site_name}  站点  +   使用代理ip出现验证码：{scraper_url}")
                        time.sleep(random.uniform(1.5, 5.5))
                        self.headers_num_int += 1
                        continue
                except Exception as e:
                    print(e, '222222222222222222222')
                    account_name_next_url_queue = account_id + '|-|' + scraper_url + '|-|' + end_time
                    print("请求错误错误: ", account_name_next_url_queue)
                    self.queries_asin_queue.put(account_name_next_url_queue)
                    time.sleep(random.uniform(12, 23.5))
                    continue
                response = resp.text
                response_s = etree.HTML(response)
                title_page = response_s.xpath("//title/text()")
                if title_page:
                    Page_NOt = title_page[0].strip()
                else:
                    Page_NOt = None
                account_name = query[0]
                #  状态 4 Listen Now
                print_time = time.strftime('%Y %m %d %H:%M:%S', time.localtime(time.time()))
                if "Page Not Found" in Page_NOt or ("We are sorry! This Gift Card is not available" in response) or (
                        "500 - An error occurred" in response) or ("Sorry! Something went wrong!" in response):
                    # self.asin_not_found_list.append(account_id)
                    account_url_queue = account_id + '|-|' + scraper_url + '|-|' + end_time
                    self.queries_asin_queue.put(account_url_queue)
                    print(f"{print_time} 商品不存在 更改状态1 不存储数据 {account_name}")
                    time.sleep(random.uniform(1.5, 2.5))
                    continue
                elif "Need help?" in response and "Try checking your spelling or use more general terms" in response:
                    self.asin_not_sure_list.append(account_id)
                    continue

                # 获取邮编
                try:
                    ingress = response_s.xpath("//span[@id='glow-ingress-line2']/text()")
                    print('邮编：：：', ingress)
                except Exception as e:
                    self.requests_error_asin_list.append(account_id)
                    time.sleep(random.uniform(1.5, 2))
                    continue
                try:
                    ingress = ingress[0].strip()
                except:
                    ingress = None
                    print("获取邮编錯誤:")
                print(ingress, '邮编 ')
                if ingress:
                    if ("中国大陆" in ingress) or ("China" in ingress) or ("Hong" in ingress) or ("Chine" in ingress) or (
                            "Cina" in ingress):
                        try:
                            cookie_ubid_main_id = re.findall(r'ubid-main=(.*?);', cookie_str)[0]
                        except:
                            cookie_ubid_main_id = re.findall(r'session-id=(.*?);', cookie_str)[0]
                        for cookie_key_value in self.cookie_dict_delete_id.items():
                            if cookie_ubid_main_id in cookie_key_value[1]:
                                self.delete_cookies_list.append(cookie_key_value[0])
                        self.requests_error_asin_list.append(account_id)
                        time.sleep(random.uniform(1.5, 2))
                        self.headers_num_int += 1
                        continue
                else:
                    account_name_url_queue3 = account_id + '|-|' + scraper_url + '|-|' + end_time
                    self.queries_asin_queue.put(account_name_url_queue3)
                    time.sleep(random.uniform(1.5, 2))
                    continue
                brands, num_int, account_name = self.get_brand_results(response_s)
                self.xpath_html(response_s, account_id, page, brands, num_int, end_time, account_name)
                if page <= 19:
                    self.next_page(response_s, account_id, end_time)

            else:
                print(f"当前线程-{t_num} 已完成-爬取-跳出循环")
                break

    def get_brand_results(self, response_s):
        '第一次发送请求时 调用此方法'
        # 获取产品总数
        results_span_list = response_s.xpath(
            '//span[contains(text(),"results")]/text()|//div[@class="a-section a-spacing-small a-spacing-top-small"]//span/text()')
        results_list = []
        if len(results_span_list) > 0:
            ele_text = results_span_list[0].replace(".", "").replace(",", "").replace("\xa0", "")
            ele_a = re.findall("\d+-\d+", ele_text)
            if len(ele_a) == 0:
                ele_a = re.findall("\d+–\d+", ele_text)
            if ele_a:
                ele_text = ele_text.replace(ele_a[0], '')
            results_list = re.findall("(\d+)", ele_text)
        if len(results_list) > 1:
            results_list = [results_list[-1]]
        try:
            if results_list:
                results_int = results_list[0]
            if 'one result for' in ele_text or "One result" == ele_text:
                results_int = 1
            num_int = int(results_int)
        except:
            num_int = 0
        # 获取所有品牌
        brands_list = response_s.xpath("//div[@id='brandsRefinements']//ul//li//span//a//span/text()")
        if brands_list:
            brands = '|-|'.join(brands_list)
        else:
            brands = None
        account_name_list = response_s.xpath("//span[@id='nav-search-label-id']/text()")
        if account_name_list:
            account_name = account_name_list[0]
        else:
            account_name = None
        return brands, num_int, account_name

    def xpath_html(self, response_s, account_id, page, brands, num_int, end_time, account_name):
        products_asin_link_list = response_s.xpath(
            "//div[@class='s-main-slot s-result-list s-search-results sg-row']//@data-asin")
        while '' in products_asin_link_list:
            products_asin_link_list.remove('')
        if products_asin_link_list:
            for products_asin in products_asin_link_list:
                if products_asin and products_asin != '' and products_asin != ' ':
                    # 获取 asin  位置
                    page_rank = products_asin_link_list.index(products_asin) + 1
                    asin_href_list = response_s.xpath(f"//div[@data-asin='{products_asin}']//a/@href")
                    buy_data_list = response_s.xpath(
                        f"//div[@data-asin='{products_asin}']//span[contains(text(),'bought')]/text()")
                    if buy_data_list:
                        buy_data = buy_data_list[0].strip()
                    else:
                        buy_data = None
                    if len(asin_href_list) > 0:
                        asin_href_list = response_s.xpath(f"//div[@data-asin='{products_asin}']//a/@href")
                        asin_href_join = ''.join(asin_href_list)
                        row_num_lsit = re.findall(fr"{products_asin}/ref=sr_1_(\d+)\?", asin_href_join)
                        try:
                            row_num = row_num_lsit[0] if row_num_lsit else 0
                            row_num_int = int(row_num)
                        except:
                            row_num_int = 0
                    else:
                        row_num_int = 0
                    item = {"account_id": account_id, "asin": products_asin, 'row_num': row_num_int,
                            'page': page, 'brands': brands, 'results_of_num': num_int, 'page_rank': page_rank,
                            'date_info': self.date_info, 'end_time': end_time, 'buy_data': buy_data,
                            'account_name': account_name}
                    print(item)
                    self.item_queue.put(item)
        else:
            if len(products_asin_link_list) == 0:
                self.asin_not_sure_list.append(account_id)

    def next_page(self, response_s, account_id, end_time):
        next_url_list = response_s.xpath(
            '//a[contains(text(), "Weiter")]/@href|//a[contains(text(), "Next")]/@href|//a[contains(text(), "Page suivante")]/@href|//a[contains(text(), "Suivant")]/@href|//a[contains(text(), "Avanti")]/@href|//a[contains(text(), "Siguiente")]/@href|//a[contains(text(), "Seite")]/@href|//a[contains(text(), "siguiente")]/@href|//a[contains(text(), "Pagina")]/@href')
        print("下一页 --- next_url:", next_url_list)
        if next_url_list:
            for url in next_url_list:
                if '&page=' in url and '&ref=sr_pg' in url:
                    next_url = str(account_id) + '|-|' + self.site_url + url + '|-|' + end_time
                    self.queries_asin_queue.put(next_url)

    def init_list(self):
        print("=======清空变量==========")
        self.asin_not_found_list = []  # 4
        self.requests_error_asin_list = []  # 1
        self.item_queue = Queue()  # 存储 item 详情数据队列
        self.queries_asin_queue = Queue()  # 需要爬取的asin队列
        self.asin_detail_list = []  # 存储asin 详情的列表
        self.asin_syn_list = []  # 存储asin 详情的列表
        self.delete_cookies_list = []  # 存储出现中国邮编的cookie
        self.asin_not_sure_list = []  # 6
        self.asin_not_seller_id = []  # 5
        self.seller_account_num_list = []  # 存储店铺产品总数
        self.account_name_page_list = []
        self.headers_num_int = 0
        self.user_asin_list = []  # 收藏asin 所有字段
        self.asin_real_spider = []  # 收藏asin

    def run(self):
        while True:
            asin_list = self.read_db_data()
            if asin_list:
                if self.cookies_queue.empty():
                    cookies_dict = self.reuests_para_val.get_cookie()
                    self.cookie_dict_delete_id = cookies_dict
                    for ck in cookies_dict.values():
                        self.cookies_queue.put(ck)
                for asin in asin_list:
                    self.queries_asin_queue.put(asin)
                html_thread = []
                for i in range(8):
                    thread2 = threading.Thread(target=self.get_product, args=(i,))
                    html_thread.append(thread2)
                for ti in html_thread:
                    ti.start()
                    time.sleep(0.25)
                for t2 in html_thread:
                    t2.join()
                # 存储数据
                self.process_item()
                # 删除cookie
                self.reuests_para_val.delete_china_cookie(list(set(self.delete_cookies_list)))
            self.init_list()
            if self.stop_item_queue == False:
                break
            break

    def process_item(self):
        print("=================开始存储数据======================")
        while True:
            if self.item_queue.empty() == False:
                item = self.item_queue.get()
                item_list = []
                asin_item_list = []
                # 需要存到数据库的字段
                item_list.append(item['account_id'])
                item_list.append(item['asin'])
                asin_item_list.append(item['asin'])  # 存储到需要抓取的表
                asin_item_list.append(9)  # 存储到需要抓取的表的数据类型
                asin_item_list.append(1)  # 存储到需要抓取的表的抓取优先级
                asin_item_list.append(self.site_name)  # 存储到需要抓取的表的站点
                asin_item_list.append(item['end_time'])  # 存储到需要抓取的表的结束时间
                asin_item_list.append(item['account_id'])  # 存储到需要抓取的表的店铺id
                item_list.append(item['row_num'])
                item_list.append(item['page'])
                item_list.append(item['brands'])
                item_list.append(item['results_of_num'])
                item_list.append(item['page_rank'])
                item_list.append(item['buy_data'])
                item_list.append(item['account_name'])
                item_list.append(item['end_time'])
                self.asin_detail_list.append(item_list)
                self.asin_syn_list.append(asin_item_list)
            else:
                if self.item_queue.empty():
                    self.save_data()
                    self.asin_detail_list = []
                    self.save_mysql_syn()
                    self.asin_syn_list = []
                    print("结束--跳出--存储")
                    break

    def read_db_data(self):
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        while True:
            try:
                with self.engine_pg6.begin() as conn:
                    # 查詢收藏asin
                    sql_read_asin = f'SELECT id, data_id, end_time FROM {self.db_user_collection_syn} WHERE now() >= crawling_time  and  now() <= end_time and  state = 1 and data_type = 1 ORDER BY id FOR UPDATE'
                    print('查詢收藏asin:', sql_read_asin)
                    self.df_read_asin = self.engine_pg6.read_sql(sql_read_asin)
                    if self.df_read_asin.shape[0] !=0:
                        self.index_tuple_asin = tuple(self.df_read_asin['id'])
                        print('self.index_tuple_asin::',len(self.index_tuple_asin))
                        if self.index_tuple_asin:
                            if len(self.index_tuple_asin) == 1:
                                sql_update = f"""UPDATE {self.db_user_collection_syn} b set state=2 where b.id in ({self.index_tuple_asin[0]})"""
                            else:
                                sql_update = f"""UPDATE {self.db_user_collection_syn} b set state=2 where b.id in {self.index_tuple_asin}"""
                            conn.execute(sql_update)
                            user_asin_list = list(
                                self.df_read_asin.data_id + '|-|' + '8' + '|-|' + '1' + '|-|' + self.site_name + '|-|' + self.df_read_asin.end_time.astype(
                                    str))
                            self.asin_real_spider = list(self.df_read_asin['data_id'])
                            for user_asin in user_asin_list:
                                print(user_asin, 'user_asinuser_asin')
                                user_asin_list = user_asin.split('|-|')
                                self.user_asin_list.append(user_asin_list)
                            print(self.user_asin_list)
                            print('存储 收藏asin')
                            self.save_asin_syn()
                    # 查询收藏店铺
                    sql_read = f'SELECT id, product_url,data_id,end_time FROM {self.db_user_collection_syn} WHERE now() >= crawling_time  and  now() <= end_time and  state = 1 and data_type = 2 ORDER BY id FETCH FIRST {self.read_size} ROWS ONLY FOR UPDATE;'
                    print('查询收藏店铺:', sql_read)
                    # a = conn.execute(sql_read)
                    self.df_read = self.engine_pg6.read_sql(sql_read)
                    # self.df_read = pd.DataFrame(a, columns=['id', 'product_url', 'data_id', 'end_time'])
                    if self.df_read.shape[0] == 0:
                        self.stop_item_queue = False
                        return []
                    self.index_tuple = tuple(self.df_read['id'])
                    if len(self.index_tuple) == 1:
                        sql_update = f"""UPDATE {self.db_user_collection_syn} a set state=2 where a.id in ({self.index_tuple[0]})"""
                    else:
                        sql_update = f"""UPDATE {self.db_user_collection_syn} a set state=2 where a.id in {self.index_tuple}"""
                    conn.execute(sql_update)
                asin_list = list(self.df_read.data_id.astype(
                    "U") + '|-|' + self.df_read.product_url + '|-|' + self.df_read.end_time.astype(str))
                return asin_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                self.engine_pg6 = self.pg_connect_6()
                continue

    def save_data(self):
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        # 店铺asin信息
        while True:
            try:
                df_asin_variation = pd.DataFrame(data=self.asin_detail_list,
                                                 columns=['account_id', 'asin', 'row_num', 'page', 'brand_name_list',
                                                          'results_of_num', 'page_rank', 'buy_data', 'account_name',
                                                          'collect_end_time'])
                df_asin_variation.drop_duplicates(['account_id', 'asin'], inplace=True)  # 去重
                self.account_name_list_update = list(df_asin_variation.account_id)  # 获取状态3的店铺名称
                if self.asin_detail_list:
                    with self.engine_pg6.begin() as conn:
                        if len(self.account_name_list_update) == 1:
                            sql_DELETE = f"""DELETE FROM {self.site_name}_user_seller_collections  where account_id in ({self.account_name_list_update[0]})"""
                        else:
                            sql_DELETE = f"""DELETE FROM {self.site_name}_user_seller_collections  where account_id in {tuple(self.account_name_list_update)}"""
                        conn.execute(sql_DELETE)
                    self.engine_pg6.to_sql(df_asin_variation,f"{self.site_name}_user_seller_collections",if_exists='append')
                self.asin_detail_list = []
                break
            except Exception as e:
                print(f"存储信息 失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                time.sleep(20)
                self.engine_pg6 = self.pg_connect_6()
                continue

        if self.requests_error_asin_list:
            self.db_change_state(state=1)
            self.requests_error_asin_list = []
        if self.account_name_list_update:
            self.db_change_state(state=3)
            self.account_name_list_update = []
        if self.asin_not_found_list:
            self.db_change_state(state=4)
            self.asin_not_found_list = []
        if self.asin_not_sure_list:
            self.db_change_state(state=6)
            self.asin_not_sure_list = []
        if self.asin_not_seller_id:
            self.db_change_state(state=5)
            self.asin_not_seller_id = []

    def save_mysql_syn(self):
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        if self.asin_syn_list:
            self.engine = self.mysql_connect()
            self.engine_pg6 = self.pg_connect_6()
            df_asin = pd.DataFrame(data=self.asin_syn_list,
                                   columns=['asin', 'data_type', 'priority', 'site', 'end_time', 'account_id'])
            current_time = datetime.datetime.now()
            # three_hours_ago = current_time - datetime.timedelta(hours=3)
            _time = current_time.strftime('%Y-%m-%d %H:%M:%S')
            with self.engine.begin() as conn:
                print(df_asin.asin, 'df_asin.asindf_asin.asin')
                if len(set(df_asin.asin)) == 1:
                    update_sql = f"""
                        UPDATE {self.site_name}_self_real_spider a
                        JOIN (
                            SELECT asin, data_type, site
                            FROM {self.site_name}_self_real_spider
                            WHERE asin IN ({tuple(df_asin.asin)[0]})
                                AND updated_at < '{_time}'
                                AND state = 3
                        ) b ON a.asin = b.asin AND a.site = b.site
                        SET a.data_type = CONCAT_WS(',', b.data_type, 9),a.state=1,a.priority=1,
                        a.account_id= '{list(df_asin.account_id)[0]}'
                        WHERE a.asin = b.asin
                            AND a.site = b.site
                            AND FIND_IN_SET('9', a.data_type) = 0;
                        """
                else:
                    update_sql = f"""
                    UPDATE {self.site_name}_self_real_spider a
                        JOIN (
                            SELECT asin, data_type, site
                            FROM {self.site_name}_self_real_spider
                            WHERE asin in {tuple(set(df_asin.asin))}
                                AND updated_at < '{_time}'
                                AND state = 3
                        ) b ON a.asin = b.asin AND a.site = b.site
                        SET a.data_type = CONCAT_WS(',', b.data_type, 9),a.state=1,a.priority=1
                        WHERE a.asin = b.asin
                            AND a.site = b.site
                            AND FIND_IN_SET('9', a.data_type) = 0;
                    """
                conn.execute(update_sql)
                conn.execute(
                    f"insert into {self.site_name}_self_real_spider (asin, data_type, priority,site, end_time,account_id) values (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE asin = values(asin), site = values (site),end_time=values (end_time),account_id=values (account_id)",
                    self.asin_syn_list)

    def save_asin_syn(self):
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        print(self.asin_real_spider)
        current_time = datetime.datetime.now()
        # three_hours_ago = current_time - datetime.timedelta(hours=3)
        _time = current_time.strftime('%Y-%m-%d %H:%M:%S')
        with self.engine.begin() as conn:
            if len(self.asin_real_spider) == 1:
                update_sql_asin = f"""
                    UPDATE {self.site_name}_self_real_spider a
                    JOIN (
                        SELECT asin, data_type, site
                        FROM {self.site_name}_self_real_spider
                        WHERE asin IN ('{self.asin_real_spider[0]}')
                            AND updated_at < '{_time}'
                           
                    ) b ON a.asin = b.asin AND a.site = b.site 
                    SET a.data_type = CONCAT_WS(',', b.data_type, 8),a.state=1,a.priority=1
                    WHERE a.asin = b.asin
                        AND a.site = b.site and a.site='us'
                        AND FIND_IN_SET('8', a.data_type) = 0;
                    """
            else:
                update_sql_asin = f"""
                                    UPDATE {self.site_name}_self_real_spider a
                                    JOIN (
                                        SELECT asin, data_type, site
                                        FROM {self.site_name}_self_real_spider
                                        WHERE asin IN {tuple(set(self.asin_real_spider))}
                                            AND updated_at < '{_time}'
                                           
                                    ) b ON a.asin = b.asin AND a.site = b.site 
                                    SET a.data_type = CONCAT_WS(',', b.data_type, 8),a.state=1,a.priority=1
                                    WHERE a.asin = b.asin
                                        AND a.site = b.site and a.site='us'
                                        AND FIND_IN_SET('8', a.data_type) = 0;
                                    """
            print(update_sql_asin)
            conn.execute(update_sql_asin)
            conn.execute(
                f"insert into {self.site_name}_self_real_spider (asin, data_type, priority,site,end_time) values (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE asin = values(asin),site= values (site),end_time=values (end_time)",
                self.user_asin_list)

    def db_change_state(self, state=2):
        if state == 1:
            self.db_change_state_common(state=state, account_name_list=self.requests_error_asin_list)
        if state == 3:
            self.db_change_state_common(state=state, account_name_list=self.account_name_list_update)
        elif state == 4:
            self.db_change_state_common(state=state, account_name_list=self.asin_not_found_list)
        elif state == 6:
            self.db_change_state_common(state=state, account_name_list=self.asin_not_sure_list)

    def db_change_state_common(self, state, account_name_list):
        account_name_list = list(set(account_name_list))
        print(f"==================== 存储状态 {state} 数据 ========== {len(account_name_list)} ========")
        df = self.df_read.loc[self.df_read.data_id.isin(account_name_list)]
        id_tuple = tuple(df.id)
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        while True:
            try:
                with self.engine_pg6.begin() as conn:
                    # 1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_user_collection_syn} set state ={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_user_collection_syn} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_user_collection_syn}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                self.engine_pg6 = self.pg_connect_6()
                continue


if __name__ == '__main__':
    site_list = ['us']
    for site in site_list:
        async_account_name_products_ = async_account_name_products(site)
        async_account_name_products_.run()
