import pymysql
import requests
import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from amazon_params import py_ja3
from utils.requests_param import Requests_param_val
from utils.db_connect import BaseUtils
from amazon_params.params import DB_CONN_DICT
from amazon_spider.VPS_IP import pppoe_ip
import uuid
import random
import time
from lxml import etree
import re
import urllib3
from queue import Queue
import threading
import pandas as pd
import datetime
import json
from utils.secure_db_client import get_remote_engine

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sess = requests.Session()
import traceback

"""抓取亚马逊 new_releases ，和 new_releases asin top100"""


class nsr_catgory(BaseUtils):
    def __init__(self, site_name):
        super().__init__()
        self.site_name = site_name
        self.week = int(time.strftime("%W"))
        self.year = int(time.strftime("%Y"))
        today = datetime.datetime.today()
        self.time_strftime_ = time.strftime("%Y-%m-%d", time.localtime())
        self.month = today.month
        self.name_path_queue = Queue()
        self.cookies_queue = Queue()
        self.headers_num_int = 0
        self.insert_list = []
        self.insert_id_pid_list = []
        self.id_and_en_name_list = []
        self.init_db(site_name)
        self.name_id_queue = Queue()
        self.item_list = []
        self.delect_asin_list = []
        self.catgory_next_path_list = []
        self.asin_dict_ = {}
        self.item_queue = Queue()
        self.columns = ['cate_current_id', 'asin', 'bsr_rank', 'price', 'rating', 'total_comments', "week",
                        "year_month", 'category_id']
        self.int_parse()

    def db_engine_us(self, site_name, db_type):
        engine_mysql = get_remote_engine(
            site_name=site_name,  # -> database "selection"
            db_type=db_type,  # -> 服务端 alias "mysql"
        )
        return engine_mysql

    def db_cursor_connect_update(self, sql, site):
        for i in range(3):
            try:
                engine_us_mysql = self.db_engine_us(site, 'mysql')
                print('更新sql：', sql)
                with engine_us_mysql.begin() as conn:
                    conn.execute(sql)
                break
            except:
                print(site, 'db_cursor_connect 报错：', sql)

    def db_cursor_connect_msyql_read(self, site=None, select_state1_sql=None):
        for i in range(3):
            try:
                if site:
                    engine_us_mysql = self.db_engine_us(site, 'mysql')
                else:
                    engine_us_mysql = self.db_engine_us(self.site_name, 'mysql')

                print(select_state1_sql)
                df = engine_us_mysql.read_sql(select_state1_sql)
                return df
            except Exception as e:
                import traceback
                traceback.print_exc()  # ★ 打印完整栈到终端
                print(e, 'db_cursor_connect_msyql_read 报错：', select_state1_sql)

    def int_parse(self):
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, spider="seller_account_product")
        self.year_month = f'{self.year}_{self.month}'
        sele_sql = f"SELECT week FROM week_20_to_30 WHERE `year_month`='{self.year}_{self.month}'"
        print(sele_sql)
        df_year_week = self.db_cursor_connect_msyql_read(site='us', select_state1_sql=sele_sql)
        self.year_week = list(df_year_week['week'])[-1]
        print(self.year_week, '====当前周===1232333')


    def init_db(self, site_name):
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()
        if site_name == "us":
            self.site_url = 'https://www.amazon.com'
            self.host = 'www.amazon.com'
        elif site_name == 'uk':
            self.site_url = 'https://www.amazon.co.uk'  # 站点url
            self.host = 'www.amazon.co.uk'
        elif site_name == 'de':
            self.site_url = 'https://www.amazon.de'
            self.host = 'www.amazon.de'
        elif site_name == 'fr':
            self.site_url = 'https://www.amazon.fr'
            self.host = 'www.amazon.fr'
        elif site_name == 'es':
            self.site_url = 'https://www.amazon.es'
            self.host = 'www.amazon.es'
        elif site_name == 'it':
            self.site_url = 'https://www.amazon.it'
            self.host = 'www.amazon.it'

    def safeIndex(self, list: list, index: int, default: object = None):
        """
        安全获取list的索引对应的值
        :param list: 列表
        :param index: 索引
        :param default: 默认值
        :return:
        """
        if (index <= len(list) - 1):
            return list[index]

    def parse_url(self, nodes_num: int, url: str):
        """
                        统一解析链接获取 bsr 分类id等数据
                        :param nodes_num:
                        :param url:
                        :return:
                        """
        arr = url.split("/")
        if not "ref=" in url:
            ref_suffix = None
            category_id = self.safeIndex(arr, len(arr) - 1, None)
            category_first_id = self.safeIndex(arr, len(arr) - 2, None)
        else:
            ref_suffix = self.safeIndex(arr, len(arr) - 1, None)
            category_first_id = self.safeIndex(arr, len(arr) - 3, None)
            category_id = self.safeIndex(arr, len(arr) - 2, None)
        if nodes_num == 1:
            level = 1
        elif url.endswith("_0"):
            level = 2
        elif url.endswith(f"{category_first_id}_1"):
            level = 3
        else:
            level = 4

        # 获取 parent id
        if level == 1:
            # 根节点
            category_id = "0"
            category_first_id = None
            category_parent_id = None
        elif level == 2:
            # 一级节点
            category_id = category_id
            category_first_id = category_id
            category_parent_id = "0"
        elif level == 3:
            # 一级节点下的次级节点
            category_id = category_id
            category_first_id = category_first_id
            category_parent_id = category_first_id
        elif level == 4:
            category_id = category_id
            category_first_id = category_first_id
            if ref_suffix is not None:
                category_parent_id = ref_suffix[ref_suffix.rfind("_") + 1:]
            else:
                category_parent_id = None
        else:
            category_id = None
            category_first_id = None
            category_parent_id = None

        return {
            "category_id": category_id,
            "category_first_id": category_first_id,
            "category_parent_id": category_parent_id
        }

    def html_4(self, bum):
        while True:
            if self.name_path_queue.empty() == False:
                querys = self.name_path_queue.get()
                print('请求=====：', querys)
                bsr_url = querys[2]
                bsr_class_name = querys[1]
                bsr_id = querys[0]
                if self.cookies_queue.empty():
                    cookies_dict = self.reuests_para_val.get_cookie()
                    self.cookie_dict_delete_id = cookies_dict
                    for ck in cookies_dict.values():
                        self.cookies_queue.put(ck)
                while 1:
                    cookie_str = self.cookies_queue.get()
                    if len(cookie_str) > 50:
                        try:
                            cookie_lsit = json.loads(cookie_str)
                        except:
                            cookie_lsit = eval(cookie_str)
                        cookie_dic = {}
                        try:
                            for i in cookie_lsit:
                                if i:
                                    cookie_dic[i["name"]] = i["value"]
                                else:
                                    continue
                            cookie_str = ''
                            for k, v in cookie_dic.items():
                                cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                            break
                        except:
                            cookie_str = ''
                            for k, v in cookie_lsit.items():
                                cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                            break
                    else:
                        break
                n = random.randint(70, 114)
                ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{n}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
                headers = {
                    'connection': 'close',
                    'authority': self.host,
                    'accept': 'text/html,*/*',
                    'accept-language': 'zh-CN,zh;q=0.9',
                    'cache-control': 'no-cache',
                    'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
                    'origin': f'{bsr_url}',
                    'referer': f'{bsr_url}',
                    'sec-ch-ua-mobile': '?0',
                    'user-agent': ua
                }
                alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n']
                k = ""
                for i in (0, random.randint(0, 15)):
                    k += random.choice(alphabet)
                headers[k] = str(uuid.uuid4())
                headers["cookie"] = cookie_str
                while True:
                    try:
                        sess.mount(self.site_url, py_ja3.DESAdapter())
                        resp = sess.get(bsr_url, headers=headers,
                                        timeout=20, verify=False)
                        if ("Enter the characters you see below" in resp.text) or (
                                "Geben Sie die Zeichen unten ein" in resp.text) or (
                                "Introduce los caracteres que se muestran" in resp.text) or (
                                "Saisissez les caractères que vous voyez" in resp.text) or (
                                "Inserisci i caratteri visualizzati nello spazio" in resp.text):
                            time.sleep(random.uniform(1.5, 5.5))
                            continue
                        if resp.status_code == 200 or resp.status_code == 201:
                            response = etree.HTML(resp.text)
                            response.xpath("//span[@id='glow-ingress-line2']/text()")
                            break
                    except Exception as e:
                        print("报错 继续 请求", e)
                        time.sleep(random.uniform(1.5, 5.5))
                        continue

                xpath_herf_list = [
                    f'//div[@role="treeitem"]/span[contains(text(),"{bsr_class_name}")]/parent::div/following-sibling::div[@role="group"]/div[@role="treeitem"]/a'
                    f'//div[@role="treeitem"]/span[contains(text(),"{bsr_class_name}")]/parent::div/parent::div//a/@href',
                    f'//div[@role="treeitem"]/span[contains(text(),"{bsr_class_name}")]/parent::div/parent::div/parent::div//a/@href',
                    f'//div[@role="treeitem"]/span[contains(text(),"{bsr_class_name[0:5]}")]/parent::div/parent::div/parent::div//a/@href']
                for xpath_herf in xpath_herf_list:
                    a_2_list = response.xpath(xpath_herf)
                    break
                if self.site_name == 'us' or self.site_name == 'uk':
                    h1_bsr_name_lsit = response.xpath("//h1[contains(text(),'Best Sellers in')]/text()")
                    if h1_bsr_name_lsit:
                        h1_bsr_name = h1_bsr_name_lsit[0].replace("Best Sellers in", '').strip()
                    else:
                        h1_bsr_name = None
                elif self.site_name == 'de':
                    h1_bsr_name_lsit = response.xpath("//h1[contains(text(),'Bestseller in')]/text()")
                    if h1_bsr_name_lsit:
                        h1_bsr_name = h1_bsr_name_lsit[0].replace("Bestseller in", '').strip()
                    else:
                        h1_bsr_name = None
                elif self.site_name == 'fr':
                    h1_bsr_name_lsit = response.xpath("//h1[contains(text(),'Les meilleures ventes en')]/text()")
                    if h1_bsr_name_lsit:
                        h1_bsr_name = h1_bsr_name_lsit[0].replace("Les meilleures ventes en", '').strip()
                    else:
                        h1_bsr_name = None
                elif self.site_name == 'es':
                    h1_bsr_name_lsit = response.xpath("//h1[contains(text(),'Los más vendidos en')]/text()")
                    if h1_bsr_name_lsit:
                        h1_bsr_name = h1_bsr_name_lsit[0].replace("Los más vendidos en", '').strip()
                    else:
                        h1_bsr_name = None
                elif self.site_name == 'it':
                    h1_bsr_name_lsit = response.xpath("//h1[contains(text(),'Bestseller in')]/text()")
                    if h1_bsr_name_lsit:
                        h1_bsr_name = h1_bsr_name_lsit[0].replace("Bestseller in", '').strip()
                    else:
                        h1_bsr_name = None
                # self.id_and_en_name_list 1 代表还有 子分类 2代表没有子分类，为最终节点
                bsr_url_category_id_list = self.parse_url(bum - 1, bsr_url)
                if a_2_list:
                    self.id_and_en_name_list.append((bsr_id, h1_bsr_name, 1, bsr_url_category_id_list['category_id'],
                                                     bsr_url_category_id_list['category_parent_id']))
                    name_2_href_list = []
                    for a_2 in a_2_list:
                        name_2 = a_2.xpath('./text()')[0].strip()
                        name_2_href = a_2.xpath('./@href')[0].strip()
                        print('下一级类目链接：：', name_2_href)
                        href = self.site_url + name_2_href
                        id_pid_data_list = self.parse_url(bum, href)
                        print(id_pid_data_list)
                        self.insert_id_pid_list.append(
                            [name_2, href, id_pid_data_list['category_id'], id_pid_data_list['category_parent_id']])
                        # 父级类目自增id  类目名称 二级类目， 链接
                        self.insert_list.append((bsr_id, name_2, bum, href, id_pid_data_list['category_id'],
                                                 id_pid_data_list['category_parent_id']))
                        # 存储当前分类id。，和页面获取的下一级url
                        name_2_href_list.append(href)
                    self.catgory_next_path_list.append([bsr_id, '|-|'.join(name_2_href_list)])
                else:
                    self.id_and_en_name_list.append((bsr_id, h1_bsr_name, 2, bsr_url_category_id_list['category_id'],
                                                     bsr_url_category_id_list['category_parent_id']))

                print(self.year_week, '====当前周===', self.week)
                ele_next = response.xpath(
                    '//a[contains(text(), "Weiter")]/@href|//a[contains(text(), "Next")]/@href|//a[contains(text(), "Page suivante")]/@href|//a[contains(text(), "Suivant")]/@href|//a[contains(text(), "Avanti")]/@href|//a[contains(text(), "Siguiente")]/@href|//a[contains(text(), "Seite")]/@href|//a[contains(text(), "siguiente")]/@href|//a[contains(text(), "Pagina")]/@href')
                data_client_recs_list = response.xpath(
                    "//div[@class='p13n-desktop-grid']/@data-client-recs-list")
                div_list = response.xpath("//div[@id='gridItemRoot']")
                for div in div_list:
                    bsr_rank_list = div.xpath(".//span[@class='zg-bdg-text']/text()")
                    if bsr_rank_list:
                        bsr_rank = re.findall(r'#(\d+)', bsr_rank_list[0])[0]
                    else:
                        bsr_rank = None
                    href_list = div.xpath(".//a[@role='link']/@href")
                    if href_list:
                        try:
                            asin = re.findall('dp/(.*)/ref', href_list[0])[0]
                        except:
                            asin = None
                    else:
                        asin = None
                    price = None
                    rating = None
                    reviews = None
                    if asin:
                        self.asin_dict_[asin] = [bsr_id, asin, bsr_rank, price, rating, reviews, self.week,
                                                 self.year_month, bsr_url_category_id_list['category_id']]
                if data_client_recs_list:
                    data_list = eval(data_client_recs_list[0])
                    for data in data_list:
                        asin = data['id']
                        rank = data['metadataMap']['render.zg.rank']
                        if self.asin_dict_.get(asin):
                            tp_item = tuple(self.asin_dict_.get(asin))
                            self.item_list.append(tp_item)
                        else:
                            self.item_list.append(
                                (bsr_id, asin, int(rank), None, None, None, self.week, self.year_month,
                                 bsr_url_category_id_list['category_id']))
                if ele_next and data_client_recs_list:
                    self.get_bsr_asin_data(ele_next, bsr_id, bsr_url_category_id_list['category_id'], asin_dict_={})

            else:
                break

    def get_bsr_asin_data(self, ele_next, c_id, bsr_url_category_id, asin_dict_):
        next_url = self.site_url + ele_next[0]
        if self.cookies_queue.empty():
            cookies_dict = self.reuests_para_val.get_cookie()
            self.cookie_dict_delete_id = cookies_dict
            for ck in cookies_dict.values():
                self.cookies_queue.put(ck)
        while 1:
            cookie_str = self.cookies_queue.get()
            if len(cookie_str) > 50:
                try:
                    cookie_lsit = json.loads(cookie_str)
                except:
                    cookie_lsit = eval(cookie_str)
                cookie_dic = {}
                try:
                    for i in cookie_lsit:
                        if i:
                            cookie_dic[i["name"]] = i["value"]
                        else:
                            continue
                    cookie_str = ''
                    for k, v in cookie_dic.items():
                        cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                    break
                except:
                    cookie_str = ''
                    for k, v in cookie_lsit.items():
                        cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                    break
            else:
                break
        n = random.randint(70, 114)
        ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{n}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
        headers = {
            'connection': 'close',
            'authority': self.host,
            'accept': 'text/html,*/*',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'no-cache',
            'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
            'origin': f'{next_url}',
            'referer': f'{next_url}',
            'sec-ch-ua-mobile': '?0',
            'user-agent': ua
        }
        alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n']
        k = ""
        for i in (0, random.randint(0, 15)):
            k += random.choice(alphabet)
        headers[k] = str(uuid.uuid4())
        headers["cookie"] = cookie_str
        while True:
            try:
                sess.mount(self.site_url, py_ja3.DESAdapter())
                resp = sess.get(
                    next_url,
                    headers=headers,
                    timeout=15, verify=False)
                if ("Enter the characters you see below" in resp.text) or (
                        "Geben Sie die Zeichen unten ein" in resp.text) or (
                        "Introduce los caracteres que se muestran" in resp.text) or (
                        "Saisissez les caractères que vous voyez" in resp.text) or (
                        "Inserisci i caratteri visualizzati nello spazio" in resp.text):
                    print("出现验证码")
                    self.headers_num_int += 1
                    if self.headers_num_int > 150:
                        self.headers_num_int = 0
                        pppoe_ip()
                        time.sleep(5)
                    time.sleep(random.uniform(3.5, 5.5))
                    continue
                if resp.status_code == 200 or resp.status_code == 201:
                    response = etree.HTML(resp.text)
                    response.xpath("//span[@id='glow-ingress-line2']/text()")
                    break
            except Exception as e:
                print("报错 继续 请求", e)
                time.sleep(random.uniform(5.5, 15.5))
                continue

        data_client_recs_list = response.xpath("//div[@class='p13n-desktop-grid']/@data-client-recs-list")
        div_list = response.xpath("//div[@id='gridItemRoot']")
        for div in div_list:
            bsr_rank_list = div.xpath(".//span[@class='zg-bdg-text']/text()")
            if bsr_rank_list:
                bsr_rank = re.findall(r'#(\d+)', bsr_rank_list[0])[0]
            else:
                bsr_rank = None
            href_list = div.xpath(".//a[@role='link']/@href")
            if href_list:
                try:
                    asin = re.findall('dp/(.*)/ref', href_list[0])[0]
                except:
                    asin = None
            else:
                asin = None
            price = None
            rating = None
            reviews = None
            if asin:
                asin_dict_[asin] = [c_id, asin, bsr_rank, price, rating, reviews, self.week, self.year_month,
                                    bsr_url_category_id]
        if data_client_recs_list:
            data_list = eval(data_client_recs_list[0])
            for data in data_list:
                asin = data['id']
                rank = data['metadataMap']['render.zg.rank']
                if asin_dict_.get(asin):
                    tp_item = tuple(asin_dict_.get(asin))
                    self.item_list.append(tp_item)
                else:
                    self.item_list.append(
                        (c_id, asin, int(rank), None, None, None, self.week, self.year_month, bsr_url_category_id))

    def save_asin_data(self):
        while True:
            try:
                print('存储：====', len(self.item_list), f'{self.site_name}_new_releases_top100_asin')
                df = pd.DataFrame(data=self.item_list, columns=self.columns)
                df['date_info'] = self.time_strftime_
                print(df['date_info'])
                df.drop_duplicates(['asin', 'bsr_rank', 'cate_current_id'], inplace=True)  # 去重
                print(df.shape, '111111111111')
                self.engine.to_sql(df, f'{self.site_name}_new_releases_top100_asin', if_exists="append")
                self.item_list = []
            except Exception as e:
                print(e, '报错存储')
                self.mysql_reconnect(table_name=f'{self.site_name}_new_releases_top100_asin', e=e)
                time.sleep(10)
                continue
            try:
                self.engine_pg.to_sql(df, f'{self.site_name}_new_releases_top100_asin', if_exists="append")
            except Exception as e:
                print(e, '报错存储')
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
            break

    def init_list(self):
        self.insert_list = []
        self.name_path_queue = Queue()
        self.id_and_en_name_list = []
        self.name_id_queue = Queue()
        self.asin_dict_ = {}
        self.item_list = []
        self.catgory_next_path_list = []
        self.insert_id_pid_list = []
        self.cookies_queue = Queue()
        self.headers_num_int = 0

    def update_delete_time(self):
        try:
            if self.catgory_next_path_list:
                for category_path in self.catgory_next_path_list:
                    print('category_path::', category_path)
                    category_url_list = category_path[1].split('|-|')
                    category_id = category_path[0]
                    path_sql = f"""
                           select id, category_id, category_parent_id, en_name, `path`, delete_time
                       from {self.site_name}_new_releases
                       where (category_parent_id = "{category_id}")
                       order by {self.site_name}_new_releases.category_id, category_parent_id;
                       """
                    print('path_sql:', path_sql)
                    df_exist_rows = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=path_sql)
                    exist_rows = df_exist_rows.values.tolist()
                    group1_id = []
                    group2_id = []
                    for row in exist_rows:
                        print('rowrow::', row)
                        if row[4] in category_url_list:
                            group1_id.append(row[0])
                        else:
                            group2_id.append(row[0])
                    _strftime_ = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    print(_strftime_)
                    if group1_id:
                        if len(group1_id) == 1:
                            sql1 = f'update {self.site_name}_new_releases set delete_time = null where id in ({group1_id[0]})'
                        else:
                            sql1 = f"update {self.site_name}_new_releases set delete_time = null where id in {tuple(group1_id)}"
                            print('sql1::', sql1)
                        self.db_cursor_connect_update(sql1, self.site_name)
                    if group2_id:
                        if len(group2_id) == 1:
                            sql2 = f"update {self.site_name}_new_releases set delete_time = '{_strftime_}' where id in ({group2_id[0]})"
                        else:
                            sql2 = f"update {self.site_name}_new_releases set delete_time = '{_strftime_}' where id in {tuple(group2_id)}"
                        print('sql2::', sql2)
                        self.db_cursor_connect_update(sql2, self.site_name)
        except Exception as e:
            print('更新 delete_time 报错', traceback.format_exc())

    def save_date(self):
        for name_num_path in self.insert_list:
            save_name_num_list = []
            while True:
                #  不存在就插入
                try:
                    select_sql_id = f'''SELECT id FROM {self.site_name}_new_releases  WHERE `path`="{name_num_path[3]}"'''
                    print('select_sql_id:', select_sql_id)
                    df_id = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=select_sql_id)
                    if not df_id.empty:
                        save_name_num_list.append(name_num_path)
                    else:
                        select_sql_name = f'''SELECT en_name FROM {self.site_name}_new_releases  WHERE `path`="{name_num_path[3]}" order by id desc'''
                        print('select_sql_name:', select_sql_name)
                        df_en_name = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=select_sql_name)
                        print(df_en_name['en_name'][0], '33333333333333333333333', name_num_path[1])
                        if df_en_name['en_name'][0] == name_num_path[1]:
                            pass
                        else:
                            _strftime_ = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                            update_name_sql = f'''update {self.site_name}_new_releases set delete_time = '2023-06-19 00:00:00' WHERE `path`="{name_num_path[3]}" and delete_time is null'''
                            print('更新 en_name：', update_name_sql)
                            self.db_cursor_connect_update(update_name_sql, self.site_name)
                            save_name_num_list.append(name_num_path)
                    break
                except Exception as e:
                    print(e)
                    time.sleep(10)
                    self.init_db(self.site_name)
                    continue

            # # 插入新的数据
            print('save_name_num_list::', save_name_num_list)
            with self.engine.begin() as conn:
                conn.execute(
                    f"insert into {self.site_name}_new_releases (p_id, en_name, nodes_num,path, category_id, category_parent_id) values (%s, %s,%s, %s,%s, %s)",
                    save_name_num_list)

    def run(self):
        print(" run 函数 是抓取 分类节点，只新增，不删除")
        num = 1
        while True:
            print(num)
            b_num = num
            name_1_list = self.db_read_data(num)
            print('name_1_list:::', len(name_1_list))
            if name_1_list:
                if self.cookies_queue.empty():
                    cookies_dict = self.reuests_para_val.get_cookie()
                    self.cookie_dict_delete_id = cookies_dict
                    for ck in cookies_dict.values():
                        self.cookies_queue.put(ck)
                if num < 15:
                    for name_1 in name_1_list:
                        self.name_path_queue.put(name_1)
                    html_thread = []
                    for i in range(50):
                        thread2 = threading.Thread(target=self.html_4, args=(b_num + 1,))
                        html_thread.append(thread2)
                    for ti in html_thread:
                        ti.start()
                        time.sleep(1)
                    for t2 in html_thread:
                        t2.join()
                    print(f"当前 {num} 级分类 全部请求完毕", "获取二级类目：", len(self.insert_list))
                else:
                    break
                self.save_date()
                self.update_delete_time()
                self.save_asin_data()
                self.init_list()
                pppoe_ip()
                time.sleep(5)
                self.init_db(self.site_name)
            elif num > 15:
                break
            num += 1

        self.select_id_1()

    def select_id_1(self):
        # 查询 子节点的顶级父类id
        print('查询 子节点的顶级父类id')
        select_sql_1 = f'select id from {self.site_name}_new_releases where nodes_num=2'
        df_id = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=select_sql_1)
        df_id_lsit = df_id.values.tolist()
        print(df_id_lsit)
        for id in df_id_lsit:
            try:
                en_name_id_list = []
                select_p_id = f"select t3.id,t4.en_name from (select t1.id,t1.parent_id,if(find_in_set(parent_id, @pids) > 0, @pids := concat(@pids, ',',id), 0) as ischild from (select id,p_id as parent_id from {self.site_name}_new_releases t  order by p_id,id) t1,(select @pids := {id[0]}) t2) t3 LEFT JOIN {self.site_name}_new_releases t4 on t3.id = t4.id where ischild != 0;"
                print('select_p_id::', select_p_id)
                df_all_id = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=select_p_id)
                if not df_all_id.empty:
                    all_id_lsit = df_all_id.values.tolist()
                    for en_name_id in all_id_lsit:
                        en_name_id_list.append(en_name_id[0])
                    id_tuple = tuple(en_name_id_list)
                    print(len(id_tuple))
                    if len(id_tuple) == 1:
                        update_sql = f"""UPDATE {self.site_name}_new_releases set one_category_id={id[0]} where id in ('{id_tuple[0]}')"""
                    else:
                        update_sql = f'update {self.site_name}_new_releases set one_category_id={id[0]} where id in {id_tuple}'
                    self.db_cursor_connect_update(update_sql, self.site_name)
            except:
                pass

    def db_read_data(self, i):
        while True:
            try:
                sql_read = f'select id,en_name,`path`,category_id from {self.site_name}_new_releases where nodes_num={i} and category_state=1 and delete_time is NULL'
                print(sql_read)
                df_read = self.engine.read_sql(sql_read)
                df_read.drop_duplicates(subset='category_id', inplace=True)
                print('df_read::', df_read)
                if df_read.shape[0] == 0:
                    return []
                with self.engine.begin() as conn:
                    index_tuple = tuple(df_read['category_id'])
                    if len(index_tuple) == 1:
                        sql_update = f"""UPDATE {self.site_name}_new_releases a set category_state=2 where a.category_id in ("{index_tuple[0]}")"""
                    else:
                        sql_update = f"""UPDATE {self.site_name}_new_releases a set category_state=2 where a.category_id in {index_tuple}"""
                    print(sql_update)
                    conn.execute(sql_update)
                categorys_list = list(df_read.id.astype("U") + '|-|' + df_read.en_name + '|-|' + df_read.path)
                category_url_list = []
                for i in categorys_list:
                    category_url_list.append(i.split('|-|'))
                return category_url_list
            except Exception as e:
                print(e)
                time.sleep(10)
                self.init_db(self.site_name)
                continue

    def send_ms(self):
        for i in range(3):
            try:
                url = 'http://selection.yswg.com.cn:8080/soundasia_selection/workflow/emit'
                data = {"dateType": "day", "reportDate": self.time_strftime_, "statusVal": 3,
                        "siteName": self.site_name,
                        "remark": "new_releases 榜单爬取完毕 ", "isEnd": "是",
                        "tableName": f"{self.site_name}_new_releases_top100_asin",
                        "status": "new_releases 榜单爬取完毕"}
                headers = {
                    'Content-Type': 'application/json',
                    'Accept': 'application/json'

                }
                t = requests.post(url, headers=headers, json=data)
                print(t.text)
                break
            except:
                self.init_db(self.site_name)
                time.sleep(10)
                continue
    def send_ms_count_data_num(self,site,num,day_time):
        account = 'pengyanbing,chenjianyun,wujicang'
        title = site + ' new 榜单'
        content = f'{day_time} 新品 榜单 抓取完成 当日抓取总数 {num}'
        url = 'http://47.112.96.71:8082/selection/sendMessage'
        data = {
            'account': account,
            'title': title,
            'content': content
        }
        requests.post(url=url, data=data, timeout=15)
    def updata_category_state(self):
        for i in range(3):
            try:
                update_sql = f'UPDATE {self.site_name}_new_releases set category_state=1;'
                print(update_sql)
                self.db_cursor_connect_update(update_sql, self.site_name)
                days = ((datetime.datetime.now()) + datetime.timedelta(days=-6)).strftime("%Y-%m-%d")
                delect_sql = f"DELETE FROM {self.site_name}_new_releases_top100_asin WHERE date_info < '{days}';"
                print(delect_sql)
                self.db_cursor_connect_update(delect_sql, self.site_name)
                _0_days = ((datetime.datetime.now()) + datetime.timedelta(days=0)).strftime("%Y-%m-%d")
                select_sql = f"select count(id) FROM {self.site_name}_new_releases_top100_asin WHERE date_info = '{_0_days}';"
                print(select_sql)
                df_count_data_num = self.db_cursor_connect_msyql_read(site=None, select_state1_sql=select_sql)
                count_data_num = df_count_data_num['count(id)'][0]
                print('count_data_num::', count_data_num)
                self.send_ms_count_data_num(self.site_name,count_data_num,_0_days)
                break
            except Exception as e:
                print(e, '222222222')
                time.sleep(20)
                continue
        updata_sql_none = f"UPDATE {self.site_name}_new_releases set and_en_name=NULL WHERE and_en_name='None'"
        self.db_cursor_connect_update(updata_sql_none, self.site_name)


    def update_sql(self):
        with self.engine_pg.begin() as conn:
            up_sql = "update user_collection_syn set state=1 where state=2"
            print(up_sql)
            conn.execute(up_sql)


if __name__ == '__main__':
    pppoe_ip()
    for site in ['us', 'de', 'uk']:
        spider_us = nsr_catgory(site_name=site)
        spider_us.run()
        spider_us.send_ms()
        if site == 'us':
            spider_us.update_sql()
    for site in ['us', 'de', 'uk']:
        spider_us = nsr_catgory(site_name=site)
        spider_us.updata_category_state()
    for site in ['us', 'de', 'uk']:
        from amazon_every_day_spider.get_BsrCatgory_asin_rank import bsr_catgory

        spider_us = bsr_catgory(site_name=site)
        spider_us.run()
        spider_us.update_category_state()
        spider_us.run_update_redirect_flag()
        spider_us.updata_category_first_id()
