import copy
import time
import logging
import os, sys
import pandas as pd
from queue import Queue
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
sys.path.append(os.path.dirname(os.path.dirname(sys.path[0])))  # 上级目录
# useful for handling different item types with a single interface
from amazon_spider.utils.utils import send_mg
from amazon_spider.utils.common import is_internet_available
from amazon_spider.db.mysql_db import df_to_sql, get_country_engine
from amazon_spider.db.pg_db import get_pg_country_engine, get_14pg_country_engine


class AmazonRealKeepaSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "error_queue": Queue(),
            "inner_item_queue": Queue(),
            # "variat_item_queue": Queue(),
            "asin_img_queue": Queue(),
            # "self_variat_queue": Queue(),
        }
        self.num = 90
        self.img_num = 50
        # while True:
        #     if is_internet_available():
        #         self.producer = KafkaProducer(
        #             bootstrap_servers=['113.100.143.162:39092'],
        #             api_version=(2, 4, 1),
        #             value_serializer=lambda v: json.dumps(v).encode('utf-8')
        #         )
        #         break
        #     else:
        #         time.sleep(2)
        #         logging.info("Network connection failure")
        #         continue

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if is_internet_available():
                    e = get_pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg14":
                if is_internet_available():
                    e = get_14pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    @func_set_timeout(100)
    def df_read_sql(self, find_sql, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete
        elif db == "pg":
            e = get_pg_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete
        elif db == "pg14":
            e = get_14pg_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete

    def save_db(self, table, df, site, db):
        # 入库报错重试
        while True:
            try:
                if df_to_sql(table, df, site=site, db=db):
                    logging.info(
                        f"更新 {db} 数据库 {table} -----{df.shape}---------{df.head()} {list(df.values)[0]}")
                    break
                else:
                    logging.info(f"更新 {db} 数据库 {table} -----失败")
                    continue
            except OperationalError as e:
                logging.info(f"更新 {db} 数据库 {table} 失败  连接错误{e}")
                continue
            except FunctionTimedOut as e:
                logging.info(
                    f"更新 {db} 数据库 {table} -超时-{e}---{df.shape}---------{df.head()}")
                continue

    @func_set_timeout(300)
    def up_del_dis(self, sql, data=None, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
        elif db == "pg":
            e = get_pg_country_engine(site)
        elif db == "pg14":
            e = get_14pg_country_engine(site)
        try:
            if not is_internet_available():
                return False
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"error sql is {sql}")
            return False

    def up_del_db(self, sql, data=None, site="us", db="mysql"):
        if 'delete' in sql.lower():
            sql_msg = "delete"
            msg = ""
        elif 'insert' in sql.lower():
            sql_msg = "insert"
            msg = ""
        else:
            sql_msg = "update"
            msg = ""
        count = 0
        while True:
            try:
                if is_internet_available():
                    if sql_msg == "delete" and f"_self_asin_detail_{time.gmtime().tm_year}" in sql:
                        count += 1
                        logging.info(f"delete us_self_asin_detail_{time.gmtime().tm_year} 错误次数 +1")
                    if count >= 5:
                        logging.info(f"delete count >= 5")
                        break
                    if self.up_del_dis(sql, data=data, site=site, db=db):
                        logging.info(f"{sql_msg} {db} data ok ^_^ -----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        break
                    else:
                        time.sleep(3)
                        logging.info(
                            f"{sql_msg} {db} data error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        continue
                else:
                    time.sleep(3)
                    logging.info(
                        f"{sql_msg} {db} data network error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                    continue
            except FunctionTimedOut as e:
                time.sleep(3)
                logging.info(f"{sql_msg} {db} data time out T_T --> {e}----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                continue

    def list_exploded(self, df, key):
        exploded_list = df[key].explode()
        # 展开后转换为一个大列表
        item_list = [i for i in exploded_list.tolist() if not isinstance(i, float) and isinstance(i, dict) and i != None]
        # 详情数据
        df = pd.DataFrame(item_list)
        return df

    @staticmethod
    def asin_to_number(asin):
        """
        Convert a 10-character ASIN string to a unique number.
        This function assumes that ASIN consists of uppercase letters and digits.
        """

        def char_to_number(char):
            if char.isdigit():
                return int(char)
            else:
                return ord(char) - 55  # 'A' -> 10, 'B' -> 11, ..., 'Z' -> 35

        if len(asin) != 10:
            raise ValueError("ASIN must be 10 characters long")

        base = 36
        asin_number = 0
        for i, char in enumerate(reversed(asin)):
            asin_number += char_to_number(char) * (base ** i)

        # The final number is taken modulo 1 billion to fit the range 1-10 billion
        return asin_number % 1000000000

    def seller_datas(self, seller_detail=None, seller=None):
        logging.info("data_type to 7 data")
        # 详情数据
        inner_item_df = seller_detail
        # # 更买数据
        self_asin_seller = seller

        for name, item in inner_item_df.groupby(['site']):
            logging.info(f"name: {name}")
            del_asin = list(item.asin)
            now_date = time.strftime("%Y-%m-%d", time.localtime())
            if del_asin:
                if len(del_asin) == 1:
                    sql_del = f"delete from `{self.site}_self_asin_detail` where `asin`= '{del_asin[0]}' and `site`='{name}' and updated_at>='{now_date}';"
                else:
                    sql_del = f"delete from `{self.site}_self_asin_detail` where `asin` in {tuple(del_asin)} and `site`='{name}' and updated_at>='{now_date}';"
                # sql_delete(sql_del, site=self.site)
                self.up_del_db(sql_del, site=self.site, db="mysql")
        if "data_type" in inner_item_df.keys():
            del inner_item_df["data_type"]
        if "best_sellers_rank" in inner_item_df.keys():
            del inner_item_df["best_sellers_rank"]
        if "all_best_sellers_href" in inner_item_df.keys():
            del inner_item_df["all_best_sellers_href"]
        if "parent_asin" in inner_item_df.keys():
            del inner_item_df["parent_asin"]
        self.save_db(f"{self.site}_self_asin_detail", inner_item_df, self.site, db="mysql")
        inner_item_df["state"] = 3
        up_asin = list(zip(inner_item_df['state'], inner_item_df['asin'], inner_item_df['site']))
        sql_up = f"UPDATE `{self.site}_self_real_spider` set `state`=(%s)  where asin=(%s) and site=(%s);"

        self.up_del_db(sql_up, data=up_asin, site=self.site, db="mysql")
        inset_sql = f"insert into `self_asin_seller` (`asin`, `site`, `seller_id`, `seller_name`, `buy_boy_type`, `other_sellers_id`, `other_seller_name`, `other_seller_buy_boy_type`, `updated_at`) values (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `seller_id` = values(`seller_id`), `seller_name` = values(`seller_name`), `buy_boy_type` = values(`buy_boy_type`), `other_sellers_id` = values(`other_sellers_id`), `other_seller_name` = values(`other_seller_name`), `other_seller_buy_boy_type` = values(`other_seller_buy_boy_type`), `updated_at` = values(`updated_at`);"
        if "asin" in self_asin_seller.keys() and self_asin_seller.shape[0]:
            self_asin_seller = list(zip(self_asin_seller['asin'], self_asin_seller['site'], self_asin_seller['seller_id'],
                                        self_asin_seller['seller_name'], self_asin_seller['buy_boy_type'],
                                        self_asin_seller['other_sellers_id'], self_asin_seller['other_seller_name'],
                                        self_asin_seller['other_seller_buy_boy_type'], self_asin_seller['updated_at'],
                                        ))

            if len(self_asin_seller) == 1:
                seller = self_asin_seller[0]
            else:
                seller = self_asin_seller
            self.up_del_db(inset_sql, data=seller, site="us", db="mysql")
        else:
            logging.info(f"kkkkkkkkkk {self_asin_seller.keys()}, {self_asin_seller.values}")

    def seed_collection_succeed_msg(self, data):
        import requests
        self_url = "http://120.79.147.190:8080/soundasia_selection/userCollection/getDataFromPython"
        headers = {
            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36',
        }
        for i in range(0, 15):
            try:
                context = requests.post(self_url, headers=headers, data=data, timeout=5).text
                logging.info(
                    f"{data.get('datas')} collection succeed response ： {context}")
                break
            except Exception as e:
                time.sleep(3)
                continue
        logging.info(f"spider account_id or asin succeed：{data.get('datas')}")

    def syn_datas(self, datas):
        if self.site == "us":
            import requests
            logging.info(f"collection_syn_queue队列 len --> {datas.shape}")
            inner_item_df = datas
            asin_collection = list(set(list(inner_item_df[inner_item_df['asin_type'].str.contains("8")]["asin"])))
            # dele_asin_account_id_null = list(inner_item_df[inner_item_df['account_id'].isna()]["asin"])
            buy_collection = list(set(list(inner_item_df[inner_item_df['asin_type'].str.contains("9")]["asin"])))
            # dele_asin_account_id = list(inner_item_df[~inner_item_df['account_id'].isna()]["asin"])
            date_info = time.strftime('%Y-%m-%d', time.localtime())
            if asin_collection:
                if len(asin_collection) == 1:

                    sql_del = f"delete from {self.site}_self_asin_detail_{time.gmtime().tm_year} where date_info='{date_info}' and asin= '{asin_collection[0]}';"
                else:
                    sql_del = f"delete from {self.site}_self_asin_detail_{time.gmtime().tm_year} where date_info='{date_info}' and asin in {tuple(asin_collection)};"

                self.up_del_db(sql_del, site=self.site, db="pg")
                logging.info(f"delete asin collect {self.site}_self_asin_detail_{time.gmtime().tm_year} table {asin_collection[0:5]}")

                if len(asin_collection) == 1:
                    sql_base_del = f"delete from {self.site}_self_asin_detail_base where asin= '{asin_collection[0]}';"
                else:
                    sql_base_del = f"delete from {self.site}_self_asin_detail_base where asin in {tuple(asin_collection)};"
                self.up_del_db(sql_base_del, site=self.site, db="pg")
                logging.info(f"delete asin collect {self.site}_self_asin_detail_base table {asin_collection[0:5]}")

            if buy_collection:
                if len(buy_collection) == 1:
                    sql_del = f"delete from {self.site}_self_asin_detail_{time.gmtime().tm_year} where date_info='{date_info}' and asin= '{buy_collection[0]}';"
                else:
                    sql_del = f"delete from {self.site}_self_asin_detail_{time.gmtime().tm_year} where date_info='{date_info}' and asin in {tuple(buy_collection)};"
                self.up_del_db(sql_del, site=self.site, db="pg")
                logging.info(f"delete shop collect {self.site}_self_asin_detail_{time.gmtime().tm_year} table {buy_collection[0:5]}")

                if len(buy_collection) == 1:
                    sql_base_del = f"delete from {self.site}_self_asin_detail_base where asin= '{buy_collection[0]}';"
                else:
                    sql_base_del = f"delete from {self.site}_self_asin_detail_base where asin in {tuple(buy_collection)};"
                self.up_del_db(sql_base_del, site=self.site, db="pg")
                logging.info(f"delete shop collect {self.site}_self_asin_detail_base table {buy_collection[0:5]}")

            if inner_item_df.shape[0]:
                inner_item_df.drop_duplicates(subset=["asin", "account_id"], inplace=True)
                self.save_db(f"{self.site}_self_asin_detail_base", df=inner_item_df, site=self.site, db="pg")
                del inner_item_df["parent_asin"]
                del inner_item_df["best_sellers_rank"]
                del inner_item_df["all_best_sellers_href"]
                del inner_item_df["data_type"]

                self.save_db(f"{self.site}_self_asin_detail_{time.gmtime().tm_year}", df=inner_item_df, site=self.site, db="pg")
                inner_item_df["state"] = 3
                up_asin = list(zip(inner_item_df['state'], inner_item_df['asin'], inner_item_df['site']))
                sql_up = f"UPDATE `{self.site}_self_real_spider` set `state`=(%s)  where asin=(%s) and site=(%s);"
                if len(up_asin) == 1:
                    d = up_asin[0]
                else:
                    d = up_asin
                self.up_del_db(sql_up, data=d, site=self.site, db="mysql")

                site_df = inner_item_df[inner_item_df["site"] == self.site]
                # site_up_asin = list(zip(site_df['asin'], site_df['state']))
                # if site_up_asin:
                #     site_up_sql = f"insert into {self.site}_all_syn_st_asin (asin,state) values %s on conflict(asin) do update set state=excluded.state;"
                #     self.up_del_db(site_up_sql, data=site_up_asin, site=self.site, db="pg14")

                fetch_account_id = list(set(inner_item_df[inner_item_df['asin_type'].str.contains("9")]["account_id"]))
                if fetch_account_id:
                    logging.info(f"need to settle account id --> {fetch_account_id}")
                    if len(fetch_account_id) == 1:
                        sql_finds = f"select account_id from `{self.site}_self_real_spider` where account_id = '{fetch_account_id[0]}' and `state` in (1,2);"
                    else:
                        sql_finds = f"select account_id from `{self.site}_self_real_spider` where account_id in {tuple(fetch_account_id)} and `state` in (1,2);"

                    while True:
                        try:
                            if is_internet_available():
                                spider_complete = self.df_read_sql(sql_finds, self.site, "mysql")
                                break
                            else:
                                time.sleep(3)
                                logging.info(f"request baidu.com failed  sleep 3m --> T_T")
                                continue
                        except OperationalError as e:
                            time.sleep(2)
                            logging.info(f"get data error --> T_T {e}")
                            continue
                        except FunctionTimedOut as e:
                            time.sleep(2)
                            logging.info(f"select data time out --> T_T {e}")
                            continue
                    if spider_complete.size:
                        # 取出没有爬取完成的
                        not_spider_complete = list(set(str(i[0]) for i in spider_complete.values))
                        # 判断account_id 不在没有完成的  则是完成的
                        account_id_complete = [i for i in fetch_account_id if i not in not_spider_complete]
                        if account_id_complete:
                            # 对完成的account_id 进行处理
                            data = {
                                "site": self.site,
                                "dataType": "2",
                                "datas": ",".join(account_id_complete)
                            }
                            self.seed_collection_succeed_msg(data)
                            logging.info(f"spider succeed to account_id：{data.get('datas')}")
                        else:
                            logging.info(f"the crawl is not complete ：{not_spider_complete}")
                    else:
                        logging.info("All store ids have been crawled")
                        data = {
                            "site": self.site,
                            "dataType": "2",
                            "datas": ",".join(fetch_account_id)
                        }
                        self.seed_collection_succeed_msg(data)
                        logging.info(f'spider succeed to account_id：{",".join(fetch_account_id)}')

            if asin_collection:
                data = {
                    "site": self.site,
                    "dataType": "1",
                    "datas": ",".join(asin_collection)
                }
                self.seed_collection_succeed_msg(data)

    def queue_consumer(self, q_size):
        for k, v in self.q_dict.items():
            if q_size == "max":
                dates = [v.get() for i in range(0, v.qsize())] if v.qsize() else []
            else:
                if k == "error_queue":
                    dates = [v.get() for i in range(0, v.qsize())] if v.qsize() else []
                else:
                    dates = [v.get() for i in range(0, self.num)] if v.qsize() >= self.num else []
            if dates:
                df = pd.DataFrame(dates)
                df = copy.deepcopy(df)
                if k == "inner_item_queue":
                    if dates:
                        inner_item_df = self.list_exploded(df, 'inner_item')
                        # # 更买数据
                        # AttributeError

                        self_asin_seller = self.list_exploded(df, 'self_asin_seller')

                        seller_old = self.list_exploded(df, 'seller_old')

                        self_asin_seller_datas = inner_item_df[inner_item_df['asin_type'].str.contains("7")]
                        if self_asin_seller_datas.shape[0]:
                            self.seller_datas(seller_detail=self_asin_seller_datas, seller=self_asin_seller)

                        collection_syn_datas = inner_item_df[inner_item_df['asin_type'].str.contains("8") | inner_item_df['asin_type'].str.contains("9")]
                        if collection_syn_datas.shape[0]:
                            self.syn_datas(collection_syn_datas)
                        if seller_old.shape[0]:
                            if "seller_name" in list(seller_old.keys()):
                                inset_sql = f"insert into `self_asin_seller_old` (`asin`, `site`, `seller_id`, `seller_name`, `buy_boy_type`) values (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `seller_id` = values(`seller_id`), `seller_name` = values(`seller_name`), `buy_boy_type` = values(`buy_boy_type`);"
                                self_asin_seller_old = list(
                                    zip(seller_old['asin'], seller_old['site'], seller_old['seller_id'],
                                        seller_old['seller_name'], seller_old['buy_boy_type']
                                        ))

                                if len(self_asin_seller_old) == 1:
                                    seller_old_one = self_asin_seller_old[0]
                                else:
                                    seller_old_one = self_asin_seller_old
                                self.up_del_db(inset_sql, data=seller_old_one, site="us", db="mysql")
                                # df_to_sql(f"self_asin_seller_old", df=seller_old, site="us", db="mysql")
                        else:
                            logging.info(f"seller_old_list is None --> {seller_old.shape}")
                elif k == "error_queue":
                    if dates:
                        # 表名需要改
                        sql_up = f"UPDATE `{self.site}_self_real_spider` set `state`=(%s)  where asin=(%s) and site=(%s);"
                        up_datas = [list(i) for i in df.values]
                        if len(up_datas) == 1:
                            d = up_datas[0]
                        else:
                            d = up_datas
                        self.up_del_db(sql_up, d, self.site, db="mysql")
                        df_st_asin = df.loc[:, ["asin", "state"]]
                        site_up_asin = [list(i) for i in df_st_asin.values]
                        logging.info(f"{self.site}_self_real_spider   {df.shape}")
                        if site_up_asin:
                            site_up_sql = f"insert into {self.site}_all_syn_st_asin (asin,state) values %s on conflict(asin) do update set state=excluded.state;"

                            # while True:
                            #     try:
                            #         if is_internet_available():
                            #             if updatas_14pg_asin(site_up_sql, data=site_up_asin, site=self.site):
                            #                 logging.info(
                            #                     f"修改pg asin状态3-----{len(site_up_asin)}---------{site_up_asin}")
                            #                 break
                            #             else:
                            #                 time.sleep(3)
                            #                 logging.info(
                            #                     f"修改pg asin状态3-失败----{len(site_up_asin)}---------{site_up_asin}")
                            #                 continue
                            #         else:
                            #             time.sleep(3)
                            #             logging.info(
                            #                 f"修改pg asin状态3-网络链接失败----{len(site_up_asin)}---------{site_up_asin}")
                            #             continue
                            #     except FunctionTimedOut as e:
                            #         time.sleep(3)
                            #         logging.info(
                            #             f"修改pg asin状态3-超时{e}----{len(site_up_asin)}---------{site_up_asin}")
                            #         continue
                elif k == "asin_img_queue":
                    if dates:
                        for name, group in df.groupby(['site']):
                            logging.info(f"name: {name}")
                            img_exploded_list = group['asin_img'].explode()
                            # 展开后转换为一个大列表
                            img_list = [i for i in img_exploded_list.tolist() if not isinstance(i, float)]
                            if img_list:
                                logging.info(f"img处理{img_list[0:5]}")
                                df_img = pd.DataFrame(img_list)
                                if name not in ("ca", "mx"):
                                    df_img['mapped_asin'] = df_img['asin'].apply(self.asin_to_number)
                                df_img.drop_duplicates(subset=["asin", "img_order_by", "data_type"], inplace=True)
                                dele_asin = list(set(df_img.asin))

                                if len(dele_asin) == 1:
                                    sql_del = f"delete from {name}_asin_image where asin in ('{tuple(dele_asin)[0]}');"
                                else:
                                    sql_del = f"delete from {name}_asin_image where asin in {tuple(dele_asin)};"
                                self.up_del_db(sql_del, site=self.site, db="pg14")
                                logging.info(f"img delete {dele_asin[0:10]}")
                                # _asin_image库
                                self.save_db(f"{name}_asin_image", df_img, self.site, db="pg14")
                                logging.info(f"img save {df_img.head()}")
                elif k == "variat_item_queue":
                    if dates:
                        for name, group in df.groupby(['site']):
                            logging.info(f"variat name: {name}")
                            variat_exploded_list = group['variat_item'].explode()
                            # 展开后转换为一个大列表
                            variat_list = [i for i in variat_exploded_list.tolist() if not isinstance(i, float)]
                            if variat_list:
                                logging.info(f"variat 处理{variat_list[0:5]}")
                                df_variat = pd.DataFrame(variat_list)
                                dele_asin = list(set(df_variat["parent_asin"]))
                                if dele_asin:
                                    if len(dele_asin) == 1:
                                        sql_del = f"delete from `{self.site}_variat` where parent_asin in ('{tuple(dele_asin)[0]}');"
                                    else:
                                        sql_del = f"delete from `{self.site}_variat` where parent_asin in {tuple(dele_asin)};"
                                    # sql_delete(sql_del, site=self.site)
                                    self.up_del_db(sql_del, site=self.site, db="mysql")
                                    logging.info(f"variat delete {dele_asin[0:10]}")

                                    if len(dele_asin) == 1:
                                        sql_del = f"delete from `{self.site}_self_variat` where parent_asin in ('{tuple(dele_asin)[0]}');"
                                    else:
                                        sql_del = f"delete from `{self.site}_self_variat` where parent_asin in {tuple(dele_asin)};"
                                    # sql_delete(sql_del, site=self.site)
                                    self.up_del_db(sql_del, site=self.site, db="mysql")
                                    logging.info(f"_self_variat delete {dele_asin[0:10]}")
                                df_variat.drop_duplicates(subset=["asin"], inplace=True)
                                self.save_db(f"{self.site}_variat", df_variat, self.site, "mysql")
                                logging.info(f"variat save {df_variat.head()}")

                                self.save_db(f"{self.site}_self_variat", df_variat, self.site, "mysql")
                                logging.info(f"_self_variat save {df_variat.head()}")
                elif k == "self_variat_queue":
                    if dates:
                        for name, group in df.groupby(['site']):
                            logging.info(f"self_variat_item name: {name}")
                            variat_exploded_list = group['self_variat_item'].explode()
                            # 展开后转换为一个大列表
                            variat_list = [i for i in variat_exploded_list.tolist() if not isinstance(i, float)]
                            if variat_list:
                                logging.info(f"variat 处理{variat_list[0:5]}")
                                df_variat = pd.DataFrame(variat_list)
                                dele_asin = list(set(df_variat["parent_asin"]))
                                if dele_asin:
                                    if len(dele_asin) == 1:
                                        sql_del = f"delete from `{self.site}_self_variat` where parent_asin in ('{tuple(dele_asin)[0]}');"
                                    else:
                                        sql_del = f"delete from `{self.site}_self_variat` where parent_asin in {tuple(dele_asin)};"
                                    # sql_delete(sql_del, site=self.site)
                                    self.up_del_db(sql_del, site=self.site, db="mysql")
                                    logging.info(f"_self_variat delete {dele_asin[0:10]}")
                                self.save_db(f"{self.site}_self_variat", df_variat, self.site, "mysql")
                                logging.info(f"_self_variat save {df_variat.head()}")

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            x = {k: v.qsize() for k, v in self.q_dict.items()}
            logging.info(f'sleep to queue data save {x}')
            self.queue_consumer(q_size="max")
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item)
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
        elif item.get("variat_item"):
            self.q_dict.get("variat_item_queue").put(item)
        elif item.get("asin_img"):
            self.q_dict.get("asin_img_queue").put(item)
        elif item.get("self_variat_item"):
            self.q_dict.get("self_variat_queue").put(item)
        self.queue_consumer(q_size="min")

    def close_spider(self, spider):
        x = {k: v.qsize() for k, v in self.q_dict.items()}
        logging.info(f'sleep to queue data save {x}')
        self.queue_consumer(q_size="max")
        send_mg("hezhe", "【实时爬取进程退出】", "实时爬取进程退出")

