import time
import logging
import pandas as pd
from queue import Queue
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
# useful for handling different item types with a single interface
from amazon_spider.utils.common import is_internet_available
from amazon_spider.db.mysql_db import df_to_sql, get_country_engine
from amazon_spider.db.pg_db import get_pg_country_engine, get_14pg_country_engine, updatas_pg_asin


class AmazonCommentSpiderPipeline:
    def __init__(self, site="us"):
        self.site = site

        # self.comment_table_name = selection_table_name.get(f"{self.site}_comment_table")
        # self.comment_table_name = f"{self.site}_asin_comment_copy"

        # self.asin_table_name = selection_table_name.get(f"{self.site}_asin_variat")
        self.asin_table_name = f"{self.site}_asin_spider_comment"

        # self.comment_count_table = selection_table_name.get(f"{self.site}_comment_num_table")
        # self.s = sql_connect(self.site)
        # self.conn = get_con(self.site)
        self.q_dict = {
            # "comment_count_queue": Queue(),
            "error_queue": Queue(),

            "comment_b00_queue": Queue(),
            "comment_b01_b06_queue": Queue(),
            "comment_b07_queue": Queue(),
            "comment_b08_queue": Queue(),
            "comment_b09_queue": Queue(),
            "comment_other_queue": Queue(),

            "comment_count_one_queue": Queue(),
            "comment_count_two_queue": Queue(),
            "comment_count_three_queue": Queue(),
            "comment_count_four_queue": Queue(),
            "comment_count_five_queue": Queue(),

        }
        self.num = 50
        self.cols_comment_num_list = [
            'parent_asin', 'comment_num'
        ]

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    @func_set_timeout(300)
    def up_del_dis(self, sql, data=None, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
        elif db == "pg":
            e = get_pg_country_engine(site)
        elif db == "pg14":
            e = get_14pg_country_engine(site)
        try:
            # if not is_internet_available():
            #     return False
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"error sql is {sql}")
            return False

    def up_del_db(self, sql, data=None, site="us", db="mysql"):
        if 'delete' in sql.lower():
            sql_msg = "delete"
        elif "insert" in sql.lower():
            sql_msg = "insert"
        else:
            sql_msg = "update"
        while True:
            try:
                if is_internet_available():
                    f = updatas_pg_asin if (sql_msg == "insert") and ("pg" in db) else self.up_del_dis
                    if f(sql, data=data, site=site, db=db):
                        logging.info(f"{sql_msg} {db} ok ^_^ -----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        break
                    else:
                        time.sleep(3)
                        logging.info(
                            f"{sql_msg} {db} asin T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                        continue
                else:
                    time.sleep(3)
                    logging.info(
                        f"{sql_msg} {db} network error T_T --> {len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                    continue
            except FunctionTimedOut as e:
                time.sleep(3)
                logging.info(f"{sql_msg} {db} time out T_T --> {str(e)[0:100]}----{len(data or []) or sql}---------{[][0:5] if data is None else data[0:5]}")
                if "pg" in db:
                    if sql_msg == "insert":
                        sql_backend = f"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE query LIKE '%insert into%asin_comment%{data[0][0]}%' AND query not like '%pg_terminate_backend%';"
                    elif sql_msg == "update":
                        sql_backend = f"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE query LIKE '%_asin_spider_comment%{data[0][1]}%' AND query not like '%pg_terminate_backend%';"
                    logging.info(f"sql_backend --> {sql_backend}")
                    if is_internet_available():
                        if updatas_pg_asin(sql_backend, site=site, db=db):
                            logging.info(f"{sql_msg} {db} pg_terminate_backend ok ^_^ -----{data[0][0]}---------")
                            continue
                        else:
                            time.sleep(3)
                            logging.info(
                                f"{sql_msg} {db} pg_terminate_backend asin T_T --> {data[0][0]}---------")
                            continue
                    else:
                        time.sleep(3)
                        logging.info(
                            f"{sql_msg} {db} pg_terminate_backend network error T_T --> {data[0][0]}---------")
                        continue
                continue

    def save_db(self, table, df, site, db):
        # 入库报错重试
        while True:
            try:
                if df_to_sql(table, df, site=site, db=db):
                    logging.info(
                        f"更新 {db} 数据库 {table} -----{df.shape}---------{df.head()}")
                    break
                else:
                    logging.info(f"更新 {db} 数据库 {table} -----失败")
                    continue
            except OperationalError as e:
                logging.info(f"更新 {db} 数据库 {table} 失败  连接错误{e}")
                continue
            except FunctionTimedOut as e:
                logging.info(
                    f"更新 {db} 数据库 {table} -超时-{e}---{df.shape}---------{df.head()}")
                continue

    def queue_consumer(self, q_size):
        for k, v in self.q_dict.items():
            if q_size == "max":
                if v.qsize():
                    if ('comment_b0' in k) or ('comment_other' in k):
                        dates = []
                        for i in range(0, v.qsize()):
                            dates += v.get()
                    else:
                        dates = [v.get() for i in range(0, v.qsize())]
                else:
                    dates = []
            else:
                if v.qsize() >= self.num:
                    if ('comment_b0' in k) or ('comment_other' in k):
                        dates = []
                        for i in range(0, self.num):
                            dates += v.get()
                    else:
                        dates = [v.get() for i in range(0, self.num)]
                else:
                    dates = []
            if dates:
                if 'comment_count' in k:
                    star = k.split('_')[2]
                    inset_reviews_sql = f"insert into {self.site}_asin_comment_reviews (asin, {star}_reviews) values (%s, %s) ON CONFLICT (asin) DO UPDATE SET {star}_reviews = excluded.{star}_reviews;"
                    self.up_del_db(inset_reviews_sql, data=dates, site=self.site, db="pg")
                    logging.info(f"评论数更新 成功 {star}_reviews")
                elif k == 'error_queue':
                    # sql_up = f"UPDATE `{self.asin_table_name}` set `state`=(%s)  where `parent_asin`=(%s);"
                    dates = list(set(dates))
                    sql_up = f"UPDATE {self.asin_table_name} set state=(%s)  where parent_asin=(%s);"
                    d = dates[0] if len(dates) == 1 else dates
                    self.up_del_db(sql_up, data=d, site=self.site, db="pg")
                    logging.info(f"{self.asin_table_name} {d}")
                    sql_svg = "update us_asin_comment_reviews set reviews=coalesce(one_reviews, 0)+coalesce(two_reviews, 0)+coalesce(three_reviews, 0)+coalesce(four_reviews, 0)+coalesce(five_reviews, 0) where reviews is null and (one_reviews is not null or two_reviews is not null or three_reviews is not null or four_reviews is not null or five_reviews is not null);"
                    self.up_del_db(sql_svg, site=self.site, db="pg")
                    logging.info(f"修改 评论总数的值")
                elif ('comment_b0' in k) or ('comment_other' in k):
                    cols_list = ['asin', 'title', 'content', 'is_vp', 'model', 'rating', 'agree_num', 'img_num', 'img_url',
                                     'is_video', 'video_url', 'comment_url', 'user_name', 'user_img', 'country', 'user_page',
                                     'is_earns_commissions', 'comment_time', 'comment_time_format', 'vine_review_flag', 'comment_id']
                    df = pd.DataFrame(dates, columns=cols_list)
                    logging.info(f"去重前长度  {df.shape}")
                    df.drop_duplicates(subset=['comment_id'], inplace=True)
                    logging.info(f"去重后长度 {df.shape}")
                    # comment_b01_b06_queue
                    bxx = k.replace('comment_', '').replace('_queue', '')
                    table = f"{self.site}_asin_comment_{bxx}"
                    logging.info(f"评论表名：{table}")
                    # inset_sql = f"insert into {self.comment_table_name} (asin, parent_asin, title, content, is_vp, model, rating, agree_num, img_num, img_url, is_video, video_url, comment_url, user_name, user_img, country, user_page, is_earns_commissions, comment_time, page, md5_unique, star, syn_id) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) on conflict(md5_unique) do NOTHING;"
                    inset_sql = f"insert into {table} (asin, title, content, is_vp, model, rating, agree_num, img_num, img_url, is_video, video_url, comment_url, user_name, user_img, country, user_page, is_earns_commissions, comment_time, comment_time_format, vine_review_flag, comment_id) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) on conflict(asin, title, comment_time, user_name) do NOTHING;"
                    self.up_del_db(inset_sql, data=dates, site=self.site, db="pg")
                    logging.info(f"save ok --> 长度： {len(dates)} asin: {list(set(df.asin))[:5]} 表名 {table}")

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            x = {k: v.qsize() for k, v in self.q_dict.items()}
            logging.info(f'sleep to queue data save {x}')
            self.queue_consumer(q_size="max")
        if q := item.get('queues_'):
            data = []
            for i in range(0, q.qsize()):
                data += q.get()
            asin_data = {}
            bxx = {
                "b00": "b00",
                "b01": "b01_b06",
                "b02": "b01_b06",
                "b03": "b01_b06",
                "b04": "b01_b06",
                "b05": "b01_b06",
                "b06": "b01_b06",
                "b07": "b07",
                "b08": "b08",
                "b09": "b09",
            }
            for i in data:
                asin_three = i[0].lower()[0:3]
                table = bxx.get(asin_three, "other")
                if asin_data.get(table):
                    asin_data[table].append(i)
                else:
                    asin_data[table] = [i]
            for name, d in asin_data.items():
                self.q_dict.get(f"comment_{name}_queue").put(d)
            asin = item.get('asin')
            star = item.get('star').split("_")[0]
            comment_count = item.get('comment_count')
            self.q_dict.get(f"comment_count_{star}_queue").put((asin, comment_count))
            self.q_dict.get("error_queue").put((5, asin))


            # cols_list = ['asin', 'title', 'content', 'is_vp', 'model', 'rating', 'agree_num', 'img_num', 'img_url',
            #                  'is_video', 'video_url', 'comment_url', 'user_name', 'user_img', 'country', 'user_page',
            #                  'is_earns_commissions', 'comment_time', 'comment_time_format']

            # df = pd.DataFrame(data, columns=cols_list)
            # logging.info(f"去重前长度  {df.shape} {item['star']}")
            # df.drop_duplicates(subset=['asin', 'title', 'comment_time', 'user_name'], inplace=True)
            # logging.info(f"去重后长度 {df.shape} {item['star']}")
            # # mysql
            # # inset_sql = f"insert into `{self.comment_table_name}` (`asin`, `parent_asin`, `title`, `content`, `is_vp`, `model`, `rating`, `agree_num`, `img_num`, `img_url`, `is_video`, `video_url`, `comment_url`, `user_name`, `user_img`, `country`, `user_page`, `is_earns_commissions`, `comment_time`, `page`, `md5_unique`, `star`) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `parent_asin` = values(`parent_asin`), `title` = values(`title`), `content` = values(`content`), `is_vp` = values(`is_vp`), `is_vp` = values(`is_vp`), `rating` = values(`rating`), `agree_num` = values(`agree_num`), `img_num` = values(`img_num`), `img_url` = values(`img_url`), `is_video` = values(`is_video`), `video_url` = values(`video_url`), `comment_url` = values(`comment_url`), `user_name` = values(`user_name`), `user_img` = values(`user_img`), `country` = values(`country`), `user_page` = values(`user_page`), `is_earns_commissions` = values(`is_earns_commissions`), `comment_time` = values(`comment_time`), `page` = values(`page`), `md5_unique` = values(`md5_unique`), `star` = values(`star`);"
            # # pg
            # # inset_sql = f"insert into {self.comment_table_name} (asin, parent_asin, title, content, is_vp, model, rating, agree_num, img_num, img_url, is_video, video_url, comment_url, user_name, user_img, country, user_page, is_earns_commissions, comment_time, page, md5_unique, star, syn_id) values %s on conflict(md5_unique) do NOTHING;"
            # bxx = {
            #     "b00": "b00",
            #     "b01": "b01_b06",
            #     "b02": "b01_b06",
            #     "b03": "b01_b06",
            #     "b04": "b01_b06",
            #     "b05": "b01_b06",
            #     "b06": "b01_b06",
            #     "b07": "b07",
            #     "b08": "b08",
            #     "b09": "b09",
            # }
            # asin_three = item.get('asin').lower()[0:3]
            # table = f"{self.site}_asin_comment_{bxx.get(asin_three) or 'other'}"
            # logging.info(f"评论表名：{table}")
            # # inset_sql = f"insert into {self.comment_table_name} (asin, parent_asin, title, content, is_vp, model, rating, agree_num, img_num, img_url, is_video, video_url, comment_url, user_name, user_img, country, user_page, is_earns_commissions, comment_time, page, md5_unique, star, syn_id) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) on conflict(md5_unique) do NOTHING;"
            # inset_sql = f"insert into {table} (asin, title, content, is_vp, model, rating, agree_num, img_num, img_url, is_video, video_url, comment_url, user_name, user_img, country, user_page, is_earns_commissions, comment_time, comment_time_format) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) on conflict(asin, title, comment_time, user_name) do NOTHING;"
            # self.up_del_db(inset_sql, data=data, site=self.site, db="pg")
            # logging.info(f"save ok asin_id {item.get('id')} {item.get('asin')}")
            # self.q_dict.get("error_queue").put((5, item.get("asin")))
            #
            # asin = item.get('asin')
            # star = item.get('star').split("_")[0]
            # comment_count = item.get('comment_count')
            #
            # inset_reviews_sql = f"insert into {self.site}_asin_comment_reviews (asin, {star}_reviews) values (%s, %s) ON CONFLICT (asin) DO UPDATE SET {star}_reviews = excluded.{star}_reviews;"
            # self.up_del_db(inset_reviews_sql, data=[(asin, comment_count)], site=self.site, db="pg")
            # logging.info(f"评论数更新 成功 {star}_reviews")

            # insert_data(inset_sql, data=d, site=self.site, db="pg")
            # updatas_pg_asin(inset_sql, data=data, site=self.site, db="pg")
            # self.q_dict.get("comment_count_queue").put((item.get("asin"), item.get("comment_count"), item.get("star")))
        elif item.get("count_max") and (not item.get("error_asin")):
            # 将数据添加到队列
            # self.q_dict.get("comment_count_queue").put((item.get("asin"), item.get("comment_count"), item.get("star")))
            self.q_dict.get("error_queue").put(item.get("sql_data"))
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("sql_data"))
        self.queue_consumer("min")

    def close_spider(self, spider):
        print(f'{self.site}  爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        self.queue_consumer("max")

