import time
import copy
import logging
import pandas as pd
from queue import Queue
# useful for handling different item types with a single interface
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
from amazon_spider.utils.common import is_internet_available
from amazon_spider.db.mysql_db import df_to_sql, get_country_engine
from amazon_spider.db.pg_db import get_pg_country_engine, get_14pg_country_engine


class ContactInfo1688SpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue(),
        }
        self.num = 50

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if is_internet_available():
                    e = get_pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg14":
                if is_internet_available():
                    e = get_14pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    @func_set_timeout(100)
    def df_read_sql(self, find_sql, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete
        elif db == "pg":
            e = get_pg_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete
        elif db == "pg14":
            e = get_14pg_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete

    def save_db(self, table, df, site, db):
        # 入库报错重试
        while True:
            try:
                if df_to_sql(table, df, site=site, db=db):
                    logging.info(
                        f"更新 {db} 数据库 {table} -----{df.shape}---------{df.head()} {list(df.values)[0]}")
                    break
                else:
                    logging.info(f"更新 {db} 数据库 {table} -----失败")
                    continue
            except OperationalError as e:
                logging.info(f"更新 {db} 数据库 {table} 失败  连接错误{e}")
                continue
            except FunctionTimedOut as e:
                logging.info(
                    f"更新 {db} 数据库 {table} -超时-{e}---{df.shape}---------{df.head()}")
                continue

    @func_set_timeout(300)
    def up_del_dis(self, sql, data=None, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
        elif db == "pg":
            e = get_pg_country_engine(site)
        elif db == "pg14":
            e = get_14pg_country_engine(site)
        try:
            if not is_internet_available():
                return False
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"error sql is {sql}")
            return False

    def up_del_db(self, sql, data=None, site="us", db="mysql"):
        if 'delete' in sql.lower():
            sql_msg = "delete"
            msg = ""
        elif 'insert' in sql.lower():
            sql_msg = "insert"
            msg = ""
        else:
            sql_msg = "update"
            msg = ""
        count = 0
        while True:
            try:
                if is_internet_available():
                    if sql_msg == "delete" and f"_self_asin_detail_{time.gmtime().tm_year}" in sql:
                        count += 1
                        logging.info(f"delete us_self_asin_detail_{time.gmtime().tm_year} 错误次数 +1")
                    if count >= 5:
                        logging.info(f"delete count >= 5")
                        break
                    if self.up_del_dis(sql, data=data, site=site, db=db):
                        logging.info(f"{sql_msg} {db} data ok ^_^ -----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        break
                    else:
                        time.sleep(3)
                        logging.info(
                            f"{sql_msg} {db} data error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        continue
                else:
                    time.sleep(3)
                    logging.info(
                        f"{sql_msg} {db} data network error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                    continue
            except FunctionTimedOut as e:
                time.sleep(3)
                logging.info(f"{sql_msg} {db} data time out T_T --> {e}----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                continue

    def list_exploded(self, df, key):
        exploded_list = df[key].explode()
        # 展开后转换为一个大列表
        item_list = [i for i in exploded_list.tolist() if not isinstance(i, float) and isinstance(i, dict) and i != None]
        # 详情数据
        df = pd.DataFrame(item_list)
        return df

    @staticmethod
    def asin_to_number(asin):
        """
        Convert a 10-character ASIN string to a unique number.
        This function assumes that ASIN consists of uppercase letters and digits.
        """

        def char_to_number(char):
            if char.isdigit():
                return int(char)
            else:
                return ord(char) - 55  # 'A' -> 10, 'B' -> 11, ..., 'Z' -> 35

        if len(asin) != 10:
            raise ValueError("ASIN must be 10 characters long")

        base = 36
        asin_number = 0
        for i, char in enumerate(reversed(asin)):
            asin_number += char_to_number(char) * (base ** i)

        # The final number is taken modulo 1 billion to fit the range 1-10 billion
        return asin_number % 1000000000

    def queue_consumer(self, q_size):
        for k, v in self.q_dict.items():
            if q_size == "max":
                dates = [v.get() for i in range(0, v.qsize())] if v.qsize() else []
            else:
                dates = [v.get() for i in range(0, self.num)] if v.qsize() >= self.num else []
            if dates:
                df = pd.DataFrame(dates)
                df = copy.deepcopy(df)
                if k == "inner_item_queue":
                    if dates:
                        print(df)
                        del_memberId = list(set(df['memberId']))
                        if len(del_memberId) == 1:
                            sql_del = f"delete from `1688_company_info` where `memberId`= '{del_memberId[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_info` where `memberId` in {tuple(del_memberId)};"
                        # sql_delete(sql_del, site=self.site)
                        self.up_del_db(sql_del, site='us', db='mysql')
                        df.drop_duplicates(subset=["memberId"], inplace=True)
                        self.df_to_sql('1688_company_info', df, site='us', db='mysql')
                        update_memberid = list((3, i) for i in set(df['search_term']))
                        sql_up = f"UPDATE `1688_factory_name` set `state`=(%s)  where factory_name=(%s);"
                        self.up_del_db(sql_up, update_memberid, site='us', db='mysql')
                elif k == "error_queue":
                    if dates:
                        # 表名需要改
                        sql_up = f"UPDATE `1688_factory_name` set `state`=(%s)  where factory_name=(%s);"
                        up_datas = [list(i) for i in df.values]
                        if len(up_datas) == 1:
                            d = up_datas[0]
                        else:
                            d = up_datas
                        self.up_del_db(sql_up, d, site='us', db='mysql')

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            self.queue_consumer('max')

        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item.get('inner_item'))
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("search_term"))

        self.queue_consumer("min")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        self.queue_consumer('max')
