import time
import logging
import pandas as pd
from queue import Queue
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from amazon_spider.utils.utils import send_mg
from func_timeout.exceptions import FunctionTimedOut
from amazon_spider.db.mysql_db import get_country_engine, df_to_sql
from amazon_spider.utils.common import is_internet_available
from amazon_spider.db.pg_db import get_pg_country_engine, get_14pg_country_engine, updatas_pg_asin
# useful for handling different item types with a single interface


class AmazonVariatSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            # "variat_item_queue": Queue(),
            # "asin_img_queue": Queue(),
            "error_queue": Queue(),
            # "self_variat_queue": Queue(),
        }
        self.num = 50
        self.save_num = 20

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    @func_set_timeout(300)
    def up_del_dis(self, sql, data=None, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
        elif db == "pg":
            e = get_pg_country_engine(site)
        elif db == "pg14":
            e = get_14pg_country_engine(site)
        try:
            if not is_internet_available():
                return False
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"error sql is {sql} {str(e)}")
            return False

    def up_del_db(self, sql, data=None, site="us", db="mysql"):
        if 'delete' in sql.lower():
            sql_msg = "delete"
        else:
            sql_msg = "update"
        while True:
            try:
                if is_internet_available():
                    if self.up_del_dis(sql, data=data, site=site, db=db):
                        logging.info(f"{sql_msg} {db} asin state 3 ok ^_^ -----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        break
                    else:
                        time.sleep(3)
                        logging.info(
                            f"{sql_msg} {db} asin state 3 error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        continue
                else:
                    time.sleep(3)
                    logging.info(
                        f"{sql_msg} {db} asin state 3 network error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                    continue
            except FunctionTimedOut as e:
                if "pg" in db and 'asin_image' in sql:
                    sql_backend = f"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE query={sql} AND query not like '%pg_terminate_backend%';"
                    logging.info(f"sql_backend --> {sql_backend}")
                    if is_internet_available():
                        if updatas_pg_asin(sql_backend, site=site, db=db):
                            logging.info(f"{sql_msg} {db} pg_terminate_backend ok ^_^ -----{data[0][0]}---------")
                            continue
                        else:
                            time.sleep(3)
                            logging.info(
                                f"{sql_msg} {db} pg_terminate_backend asin T_T --> {data[0][0]}---------")
                            continue
                else:
                    time.sleep(3)
                    logging.info(
                        f"{sql_msg} {db} asin state 3 time out T_T --> {e}----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                    continue

    def asin_state_to_list(self, df):
        df_9 = df.loc[df.volume.isna() & df.weight.isna() & df[
            "rank"].isna() & df.launch_time.isna() & df.price.isna() & df.rating.isna() & df.total_comments.isna()]
        df_ = df.loc[~(df.volume.isna() & df.weight.isna() & df[
            "rank"].isna() & df.launch_time.isna() & df.price.isna() & df.rating.isna() & df.total_comments.isna())]
        df_7 = df_.loc[df.volume.isna() & df.weight.isna() & df["rank"].isna() & df.launch_time.isna()]
        df_3 = df_.loc[~(df.volume.isna() & df.weight.isna() & df["rank"].isna() & df.launch_time.isna())]
        df_9["state"] = 9
        df_7["state"] = 7
        df_3["state"] = 3
        df_9 = df_9.loc[:, ["state", "asin", "site"]]
        df_7 = df_7.loc[:, ["state", "asin", "site"]]
        df_3 = df_3.loc[:, ["state", "asin", "site"]]
        asin_list = []
        asin_list += [list(i) for i in df_9.values]
        asin_list += [list(i) for i in df_7.values]
        asin_list += [list(i) for i in df_3.values]
        return asin_list

    def save_db(self, table, df, site, db):
        # 入库报错重试
        while True:
            try:
                if df_to_sql(table, df, site=site, db=db):
                    logging.info(
                        f"更新 {db} 数据库 {table} -----{df.shape}---------{df.head()}")
                    break
                else:
                    logging.info(f"更新 {db} 数据库 {table} -----失败")
                    continue
            except OperationalError as e:
                logging.info(f"更新 {db} 数据库 {table} 失败  连接错误{e}")
                continue
            except FunctionTimedOut as e:
                logging.info(
                    f"更新 {db} 数据库 {table} -超时-{e}---{df.shape}---------{df.head()}")
                continue

    def queue_consumer(self, q_size):
        for k, v in self.q_dict.items():
            if q_size == "max":
                if v.qsize():
                    dates = [v.get() for i in range(0, v.qsize())]
                else:
                    dates = []
            else:
                if v.qsize() >= self.num:
                    dates = [v.get() for i in range(0, self.num)]
                else:
                    dates = []
            if dates:
                df = pd.DataFrame(dates)
                if k == "inner_item_queue":
                    if dates:
                        if df.shape[0]:
                            self.save_db(f"{self.site}_self_detail_returns", df, self.site, "mysql")
                            logging.info(f"{self.site}_self_detail_returns   {df.shape}")
                            # if self.site == "us":
                            #     self.save_db(f"{self.site}_self_asin_detail_{time.gmtime().tm_year}", df, self.site, "pg")
                        sql_up = f"UPDATE `{self.site}_self_asin_returns_new` set `state`=(%s)  where asin=(%s) and site=(%s);"
                        asin_list = self.asin_state_to_list(df)
                        # 暑假还有多久 放完啊
                        if len(asin_list) == 1:
                            d = asin_list[0]
                        else:
                            d = asin_list
                        self.up_del_db(sql_up, d, self.site, db="mysql")
                elif k == "error_queue":
                    if dates:
                        # 表名需要改
                        sql_up = f"UPDATE `{self.site}_self_asin_returns_new` set `state`=(%s)  where asin=(%s) and site=(%s);"
                        up_datas = [list(i) for i in df.values]
                        if len(up_datas) == 1:
                            d = up_datas[0]
                        else:
                            d = up_datas
                        self.up_del_db(sql_up, d, self.site, db="mysql")
                        logging.info(f"{self.site}_self_asin_returns_new   {df.shape}")

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            self.queue_consumer(q_size="max")
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item.get('inner_item'))
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
        self.queue_consumer("min")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        self.queue_consumer("max")
        send_mg("hezhe", "【内部asin爬取进程退出】", "内部asin爬取进程退出")