import logging
import pandas as pd
from queue import Queue
from sqlalchemy.exc import IntegrityError
from amazon_spider.db.pg_db import get_pg_con
from amazon_spider.conf.db import selection_table_name
from amazon_spider.db.mysql_db import get_con, sql_update, sql_update_many, sql_connect, sql_delete, get_country_engine, \
    sql_insert_many, sql_insert


class AmazonSpiderPipeline:
    def __init__(self, site="us"):
        self.site = site
        self.comment_table_name = selection_table_name.get(f"{self.site}_comment_table")
        self.asin_table_name = selection_table_name.get(f"{self.site}_asin_variat")
        self.comment_count_table = selection_table_name.get(f"{self.site}_comment_num_table")
        self.s = sql_connect(self.site)
        self.conn = get_con(self.site)
        self.q_dict = {"asin_queue": Queue(), "comment_count_queue": Queue(), "error_queue": Queue()}
        self.get_count = 1
        self.cols_comment_num_list = [
            'parent_asin', 'comment_num'
        ]

    def save_db(self, df, table_name, cols_list, asin):
        try:
            df.to_sql(name=table_name, con=self.conn, if_exists='append', index=False)
            logging.info(f"单条asin 入库成功{asin}")
        except IntegrityError:
            error_count = 0
            for v in df.values:
                if error_count == 4:
                    break
                df = pd.DataFrame([v], columns=cols_list)
                try:
                    df.to_sql(name=table_name, con=self.conn, if_exists='append', index=False)
                    logging.info(f"入库成功--------------{df.values}")
                except IntegrityError:
                    if table_name == self.comment_table_name:
                        error_count += 1
                        logging.info(f"数据重复, 错误数据为 {v[0]}, {v[-2]}, {v[-1]} {v[1]}")
                        continue
                    elif table_name == self.comment_count_table:
                        self.up_db(self.comment_count_table, df)
                        logging.info(f"评论数爬取完成 进行更新 {v[0]}, {v[1]}")
                        continue
                except:
                    logging.info(f'数据入库失败, 错误数据为{v[0]}, {v[-2]}, {v[-1]} {v[1]}')
        except:
            self.conn = get_con(self.site)
            self.s = sql_connect(self.site)
            logging.info(f'数据入库失败{asin}')
            # self.q_dict.get("error_queue").put(asin)
            logging.info(f'添加到error_queue {asin}')
            return True

    def get_q(self, q, size=50):
        da = []
        for i in range(0, size):
            da += q.get()
        return da

    def put_q(self, asin, comment_count):
        for k, q in self.q_dict.items():
            if k == "asin_queue":
                q.put([[asin]])
            elif k == "comment_count_queue":
                q.put([[asin, comment_count]])

    def up_db(self, table_name, asin_list):
        sql_up = None
        if table_name == self.asin_table_name:
            sql_up = f"UPDATE `{self.asin_table_name}` set `state`=(%s)  where `parent_asin`=(%s);"
        elif table_name == self.comment_count_table:
            sql_up = f"UPDATE `{self.comment_count_table}` set `comment_num`=(%s)  where `parent_asin`=(%s);"
            asin_list = asin_list.values
            asin_list = [(i[1], i[0]) for i in asin_list]
        if len(asin_list) == 1:
            sql_update(sql_up, asin_list[0])
        else:
            sql_update_many(sql_up, asin_list)
        print('sql_update::', sql_up)
        logging.info(f"爬取完成 更新数据  {asin_list}")

    def q_count(self, cols_list, asin=None, close=False):
        for k, q in self.q_dict.items():
            if close:
                count = q.qsize()
            elif q.qsize() >= self.get_count:
                count = self.get_count
            else:
                count = 0
            if count == 0:
                continue
            if k == "asin_queue":
                asin_list = self.get_q(q, size=count)
                asin_list = [("3", i[0]) for i in asin_list]
                self.up_db(self.asin_table_name, asin_list)
            elif k == "comment_count_queue":
                comment_data = self.get_q(q, size=count)
                df = pd.DataFrame(comment_data, columns=cols_list)
                self.save_db(df, self.comment_count_table, cols_list, asin)
            # elif k == "error_queue" and v.qsize() >= self.num:
            #     dates = [(1, v.get()) for i in range(0, self.num)]
            #     if len(dates) == 1:
            #         sql_update(sql_up, dates[0])
            #     else:
            #         sql_update_many(sql_up, dates)
            #     logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        cols_list = ['asin', 'parent_asin', 'title', 'content', 'is_vp', 'model', 'rating', 'agree_num', 'img_num', 'img_url', 'is_video', 'video_url', 'comment_url', 'user_name', 'user_img', 'country', 'user_page', 'is_earns_commissions', 'comment_time', 'page', 'md5_unique']
        if q := item.get('queues_'):
            data = self.get_q(q, size=q.qsize())
            logging.info(f"评论数{len(data)} {item.get('asin')}")
            df = pd.DataFrame(data, columns=cols_list)
            # df.sort_values("page", inplace=True, ascending=True)
            # self.save_db(df, self.comment_table_name, cols_list, item.get("asin"))
            if self.save_db(df, self.comment_table_name, cols_list, item.get("asin")):
                item["error_asin"] = True
                # for i in range(0, 3):
                #     if not self.save_db(df, self.comment_table_name, cols_list, item.get("asin")):
                #         break
        if item.get("count_max") and (not item.get("error_asin")):
        # if item.get("count_max"):
            # 将数据添加到队列
            self.put_q(item.get("asin"), item.get("comment_count"))
            print('评论数 队列大小', {k: v.qsize() for k, v in self.q_dict.items()})
            self.q_count(self.cols_comment_num_list, item.get("asin"))
        if item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
            if self.q_dict.get("error_queue").qsize() >= self.get_count:
                dates = [(1, self.q_dict.get("error_queue").get()) for i in range(0, self.get_count)]
                sql_up = f"UPDATE `{self.asin_table_name}` set `state`=(%s)  where `parent_asin`=(%s);"
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")

    def close_spider(self, spider):
        print(f'{self.site}  爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        self.q_count(self.cols_comment_num_list, close=True)
        dates = [(1, self.q_dict.get("error_queue").get()) for i in range(0, self.q_dict.get("error_queue").qsize())]
        sql_up = f"UPDATE `{self.asin_table_name}` set `state`=(%s)  where `parent_asin`=(%s);"
        if len(dates) == 1:
            sql_update(sql_up, dates[0])
        else:
            sql_update_many(sql_up, dates)
        logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")


class DayAmazonSpiderPipeline(AmazonSpiderPipeline):
    def __init__(self, site):
        super(DayAmazonSpiderPipeline, self).__init__()
        self.site = site
        self.comment_table_name = selection_table_name.get(f"{self.site}_comment_table")
        self.asin_table_name = selection_table_name.get(f"{self.site}_asin_variat")
        self.comment_count_table = selection_table_name.get(f"{self.site}_comment_num_table")
        sql_connect(self.site)
        self.conn = get_con(self.site)
        self.q_dict = {"comment_count_queue": Queue()}
        self.get_count = 50
        self.cols_comment_num_list = [
            'parent_asin', 'comment_num'
        ]

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        cols_list = ['asin', 'parent_asin', 'title', 'content', 'is_vp', 'model', 'rating', 'agree_num', 'img_num', 'img_url', 'is_video', 'video_url', 'comment_url', 'user_name', 'user_img', 'country', 'user_page', 'is_earns_commissions', 'comment_time', 'page', 'md5_unique']

        df = pd.DataFrame(item.get("data"), columns=cols_list)
        # df.sort_values("page", inplace=True, ascending=True)
        self.save_db(df, self.comment_table_name, cols_list, item.get("asin"))
        if item.get("save_comment_num"):
            # 将数据添加到队列
            self.put_q(item.get("asin"), item.get("comment_count"))
            q_size = {k: v.qsize() for k, v in self.q_dict.items()}
            logging.info(f'评论数 队列大小{q_size}')
            if self.q_dict.get("comment_count_queue").qsize() > self.get_count:
                comment_data = self.get_q(self.q_dict.get("comment_count_queue"), size=self.get_count)
                df = pd.DataFrame(comment_data, columns=self.cols_comment_num_list)
                self.save_db(df, self.comment_count_table, self.cols_comment_num_list, item.get("asin"))

    def save_db(self, df, table_name, cols_list, asin):
        try:
            df.to_sql(name=table_name, con=self.conn, if_exists='append', index=False)
            logging.info(f"单条asin 入库成功{asin}")
        except IntegrityError:
            error_count = 0
            for v in df.values:
                if error_count == 4:
                    break
                df = pd.DataFrame([v], columns=cols_list)
                try:
                    df.to_sql(name=table_name, con=self.conn, if_exists='append', index=False)
                    logging.info(f"入库成功--------------{df.values}")
                except IntegrityError:
                    if table_name == self.comment_table_name:
                        error_count += 1
                        logging.info(f"数据重复, 错误数据为 {v[0]}, {v[-2]}, {v[-1]} {v[1]}")
                        continue
                    elif table_name == self.comment_count_table:
                        self.up_db(self.comment_count_table, df)
                        logging.info(f"评论数爬取完成 进行更新 {v[0]}, {v[1]}")
                        continue
                except:
                    logging.info(f'数据入库失败, 错误数据为{v[0]}, {v[-2]}, {v[-1]} {v[1]}')
        except:
            self.conn = get_con(self.site)
            sql_connect(self.site)
            logging.info(f'数据入库失败{asin}')

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        comment_data = self.get_q(self.q_dict.get("comment_count_queue"), size=self.q_dict.get("comment_count_queue").qsize())
        df = pd.DataFrame(comment_data, columns=self.cols_comment_num_list)
        self.save_db(df, self.comment_count_table, self.cols_comment_num_list, asin=None)


class AmazonDetailSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.conn = get_con(self.site)
        self.pg_conn = get_pg_con(self.site)
        self.s = sql_connect(self.site)
        self.q_dict = {
            "seller_account_item_queue": Queue(),
            "error_queue": Queue(),
            "seller_account_queue": Queue(),
            "asin_img_queue": Queue(),
            "bs_category_queue": Queue(),
            "detail_item_queue": Queue(),
            "variat_item_queue": Queue(),
        }
        self.num = 1
        self.engine_pg = get_country_engine(self.site)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        if item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
        else:
            if item.get("seller_account_item"):
                self.q_dict.get("seller_account_item_queue").put(list(item.get("seller_account_item").values()))
            elif item.get("seller_account"):
                self.q_dict.get("seller_account_queue").put(list(item.get("seller_account").values()))
            elif item.get("asin_img"):
                self.q_dict.get("asin_img_queue").put({item.get("site"): list(list(i.values()) for i in item.get("asin_img"))})
                # self.q_dict.get("asin_img_queue").put(item.get("asin_img"))
            elif item.get("bs_category"):
                self.q_dict.get("bs_category_queue").put(list(item.get("bs_category").values()))
            elif item.get("detail_item"):
                self.q_dict.get("detail_item_queue").put(item)
            elif item.get("variat_item"):
                self.q_dict.get("variat_item_queue").put(list(item.get("variat_item").values()))
        # 改入库逻辑

        for k, v in self.q_dict.items():
            if k == "detail_item_queue" and v.qsize() >= self.num:
                week_group = {}
                for i in range(0, self.num):
                    d = v.get()
                    if d.get("week") in week_group.keys():
                        week_group[d.get("week")].append(d.get("detail_item"))
                    else:
                        week_group[d.get("week")] = [d.get("detail_item")]
                for j, f in week_group.items():
                    # 去除重复数据
                    f = [list(i.values()) for i in f]
                    df = pd.DataFrame(f, columns=spider.col)
                    asin_list = []
                    for u in f:
                        if u[1] == 9 or u[1] == 7:
                            asin_list.append((u[1], u[0]))
                        else:
                            asin_list.append((3, u[0]))

                    dele_asin = [i[1] for i in asin_list]
                    if len(dele_asin) == 1:
                        sql_del = f"delete from {self.site}_asin_detail_2023_{j} where asin in ('{tuple(dele_asin)[0]}');"
                    else:
                        sql_del = f"delete from {self.site}_asin_detail_2023_{j} where asin in {tuple(dele_asin)};"
                    sql_delete(sql_del)
                    logging.info(f"{self.site}_asin_detail_2023_{j} 列表数 {len(dele_asin)}  删除成功--------------{dele_asin}")

                    del df["asin_state"]
                    df.to_sql(name=f"{self.site}_asin_detail_2023_{j}", con=self.conn, if_exists='append', index=False)
                    logging.info(f"列表数 {len(f)}  入库成功--------------{f}")
                    sql_up_week = f"UPDATE `{self.site}_all_syn_st` set `state`=(%s)  where asin=(%s) and week='{j}';"
                    if len(asin_list) == 1:
                        sql_update(sql_up_week, asin_list[0])
                    else:
                        sql_update_many(sql_up_week, asin_list)
                    logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue" and v.qsize() >= self.num:
                # 表名需要改
                sql_up = f"UPDATE `{self.site}_all_syn_st` set `state`=(%s)  where asin=(%s) and `week`=(%s);"
                dates = [v.get() for i in range(0, self.num)]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
            elif k == "seller_account_item_queue" and v.qsize() >= self.num:
                # 没有则新增  有则更新
                dates = list(set([tuple(v.get()) for i in range(0, self.num)]))
                inset_sql = f"insert into `{self.site}_seller_account_syn` (`account_name`, `url`) values (%s, %s) ON DUPLICATE KEY UPDATE `account_name` = values(`account_name`);"
                if len(dates) == 1:
                    sql_insert(inset_sql, dates[0])
                else:
                    sql_insert_many(inset_sql, dates)
                logging.info(f"更新seller_account_item_queue-----{len(dates)}---------{dates}")

            elif k == "asin_img_queue" and v.qsize() >= self.num:
                dates = {}
                for i in range(0, self.num):
                    q_items = v.get()
                    if dates.get(list(q_items.keys())[0]):
                        dates[list(q_items.keys())[0]] += list(q_items.values())
                    else:
                        dates[list(q_items.keys())[0]] = []
                        dates[list(q_items.keys())[0]] += list(q_items.values())
                for site, d in dates.items():
                    dele_asin = set([i[0] for i in d[0]])
                    if len(dele_asin) == 1:
                        sql_del = f"delete from {site}_asin_image where asin in ('{tuple(dele_asin)[0]}');"
                    else:
                        sql_del = f"delete from {site}_asin_image where asin in {tuple(dele_asin)};"
                    # sql_delete(sql_del)
                    self.pg_conn.execute(sql_del)
                    logging.info(f"删除image-{site}----{len(dele_asin)}---------{dele_asin}")
                    df = pd.DataFrame(d[0], columns=["asin", "img_url", "img_order_by", "data_type"])
                    df.drop_duplicates(subset=["asin", "img_order_by", "data_type"], inplace=True)
                    df.to_sql(name=f"{site}_asin_image", con=self.pg_conn, if_exists='append', index=False)
                    logging.info(f"image入库-{site}----{len(d[0])}---------{d[0]}")

            elif k == "bs_category_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                dele_asin = [i[0] for i in dates]

                if len(dele_asin) == 1:
                    sql_del = f"delete from {self.site}_bs_category_asin_detail where asin in ('{tuple(dele_asin)[0]}');"
                else:
                    sql_del = f"delete from {self.site}_bs_category_asin_detail where asin in {tuple(dele_asin)};"
                sql_delete(sql_del)
                logging.info(f"删除bs_category_queue-----{len(dele_asin)}---------{dele_asin}")

                df = pd.DataFrame(dates, columns=["asin", "week", "best_sellers_rank", "last_herf"])
                df.drop_duplicates(subset=["asin"], inplace=True)
                df.to_sql(name=f"{self.site}_bs_category_asin_detail", con=self.conn, if_exists='append', index=False)
                logging.info(f"bs_category_queue入库-----{len(dates)}---------{dates}")

            elif k == "variat_item_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                dele_asin = set([i[1] for i in dates])

                if len(dele_asin) == 1:
                    sql_del = f"delete from `{self.site}_variat` where parent_asin in ('{tuple(dele_asin)[0]}');"
                else:
                    sql_del = f"delete from `{self.site}_variat` where parent_asin in {tuple(dele_asin)};"
                sql_delete(sql_del)
                logging.info(f"删除variat_item-----{len(dele_asin)}---------{dele_asin}")

                df = pd.DataFrame(dates, columns=["asin", "parent_asin", "color", "size", "style", "column_2", "state"])
                df.drop_duplicates(subset=["asin"], inplace=True)
                df.to_sql(name=f"{self.site}_variat", con=self.conn, if_exists='append', index=False)
                logging.info(f"variat_item入库-----{len(dates)}---------{dates}")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "detail_item_queue":
                if not v.qsize():
                    continue
                week_group = {}
                for i in range(0, v.qsize()):
                    d = v.get()
                    if d.get("week") in week_group.keys():
                        week_group[d.get("week")].append(d.get("detail_item"))
                    else:
                        week_group[d.get("week")] = [d.get("detail_item")]
                for j, f in week_group.items():
                    f = [list(i.values()) for i in f]
                    df = pd.DataFrame(f, columns=spider.col)
                    asin_list = []
                    for u in f:
                        if u[1] == 9 or u[1] == 7:
                            asin_list.append((u[1], u[0]))
                        else:
                            asin_list.append((3, u[0]))

                    dele_asin = [i[1] for i in asin_list]

                    if len(dele_asin) == 1:
                        sql_del = f"delete from {self.site}_asin_detail_2023_{j} where asin in ('{tuple(dele_asin)[0]}');"
                    else:
                        sql_del = f"delete from {self.site}_asin_detail_2023_{j} where asin in {tuple(dele_asin)};"
                    sql_delete(sql_del)
                    logging.info(f"{self.site}_asin_detail_2023_{j} 列表数 {len(dele_asin)}  删除成功--------------{dele_asin}")

                    del df["asin_state"]
                    df.to_sql(name=f"{self.site}_asin_detail_2023_{j}", con=self.conn, if_exists='append', index=False)
                    logging.info(f"列表数 {len(f)}  入库成功--------------{f}")
                    sql_up_week = f"UPDATE `{self.site}_all_syn_st` set `state`=(%s)  where asin=(%s) and week='{j}';"
                    if len(asin_list) == 1:
                        sql_update(sql_up_week, asin_list[0])
                    else:
                        sql_update_many(sql_up_week, asin_list)
                    logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue":
                if not v.qsize():
                    continue
                sql_up = f"UPDATE `{self.site}_all_syn_st` set `state`=(%s)  where asin=(%s) and `week`=(%s);"
                dates = [v.get() for i in range(0, v.qsize())]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
            elif k == "seller_account_item_queue":
                if not v.qsize():
                    continue
                # 没有则新增  有则更新
                dates = list(set([tuple(v.get()) for i in range(0, v.qsize())]))
                inset_sql = f"insert into `{self.site}_seller_account_syn` (`account_name`, `url`) values (%s, %s) ON DUPLICATE KEY UPDATE `account_name`=values(`account_name`);"
                if len(dates) == 1:
                    sql_insert(inset_sql, dates[0])
                else:
                    sql_insert_many(inset_sql, dates)
                logging.info(f"更新seller_account_item_queue-----{len(dates)}---------{dates}")

            elif k == "asin_img_queue":
                if not v.qsize():
                    continue
                dates = {}
                for i in range(0, v.qsize()):
                    q_items = v.get()
                    if dates.get(list(q_items.keys())[0]):
                        dates[list(q_items.keys())[0]] += list(q_items.values())
                    else:
                        dates[list(q_items.keys())[0]] = []
                        dates[list(q_items.keys())[0]] += list(q_items.values())
                for site, d in dates.items():
                    dele_asin = set([i[0] for i in d[0]])
                    if len(dele_asin) == 1:
                        sql_del = f"delete from {site}_asin_image where asin in ('{tuple(dele_asin)[0]}');"
                    else:
                        sql_del = f"delete from {site}_asin_image where asin in {tuple(dele_asin)};"
                    # sql_delete(sql_del)
                    self.pg_conn.execute(sql_del)
                    logging.info(f"删除image-{site}----{len(dele_asin)}---------{dele_asin}")
                    df = pd.DataFrame(d[0], columns=["asin", "img_url", "img_order_by", "data_type"])
                    df.drop_duplicates(subset=["asin", "img_order_by", "data_type"], inplace=True)
                    df.to_sql(name=f"{site}_asin_image", con=self.pg_conn, if_exists='append', index=False)
                    logging.info(f"image入库-{site}----{len(d[0])}---------{d[0]}")

            elif k == "bs_category_queue":
                if not v.qsize():
                    continue
                dates = [v.get() for i in range(0, v.qsize())]
                dele_asin = [i[0] for i in dates]

                if len(dele_asin) == 1:
                    sql_del = f"delete from {self.site}_bs_category_asin_detail where asin in ('{tuple(dele_asin)[0]}');"
                else:
                    sql_del = f"delete from {self.site}_bs_category_asin_detail where asin in {tuple(dele_asin)};"
                sql_delete(sql_del)
                logging.info(f"删除bs_category_queue-----{len(dele_asin)}---------{dele_asin}")

                df = pd.DataFrame(dates, columns=["asin", "week", "best_sellers_rank", "last_herf"])
                df.drop_duplicates(subset=["asin"], inplace=True)
                df.to_sql(name=f"{self.site}_bs_category_asin_detail", con=self.conn, if_exists='append', index=False)
                logging.info(f"bs_category_queue入库-----{len(dates)}---------{dates}")

            elif k == "variat_item_queue":
                if not v.qsize():
                    continue
                dates = [v.get() for i in range(0, self.num)]
                dele_asin = [i[1] for i in dates]

                if len(dele_asin) == 1:
                    sql_del = f"delete from {self.site}_variat where parent_asin in ('{tuple(dele_asin)[0]}');"
                else:
                    sql_del = f"delete from {self.site}_variat where parent_asin in {tuple(dele_asin)};"
                sql_delete(sql_del)
                logging.info(f"删除variat_item-----{len(dele_asin)}---------{dele_asin}")

                df = pd.DataFrame(dates, columns=["asin", "parent_asin", "color", "size", "style", "column_2", "state"])
                df.drop_duplicates(subset=["asin"], inplace=True)
                df.to_sql(name=f"{self.site}_variat", con=self.conn, if_exists='append', index=False)
                logging.info(f"variat_item入库-----{len(dates)}---------{dates}")


class AmazonMxSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.conn = get_con(self.site)
        self.s = sql_connect(self.site)
        self.q_dict = {"asin_queue": Queue(), "error_queue": Queue()}
        self.num = 1

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        if item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
        else:
            self.q_dict.get("asin_queue").put(list(item.values()))
        sql_up = f"UPDATE `mx_self_asin` set state=(%s)  where asin=(%s);"
        for k, v in self.q_dict.items():
            if k == "asin_queue" and v.qsize() >= self.num:
                state_num = 3
                dates = [v.get() for i in range(0, self.num)]
                df = pd.DataFrame(dates, columns=spider.col)
                try:
                    df.to_sql(name="mx_self_asin_detail", con=self.conn, if_exists='append', index=False)
                except:
                    logging.info(f"入库失败重新创建数据库链接")
                    self.conn = get_con(self.site)
                    self.s = sql_connect(self.site)
                    state_num = 1
                logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")
                asin_list = [(state_num, i[0]) for i in dates]
                if len(asin_list) == 1:
                    sql_update(sql_up, asin_list[0])
                else:
                    sql_update_many(sql_up, asin_list)
                logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [(1, v.get()) for i in range(0, self.num)]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        sql_up = f"UPDATE `mx_self_asin` set state=(%s)  where asin=(%s);"
        for k, v in self.q_dict.items():
            if k == "asin_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                print(dates)
                df = pd.DataFrame(dates, columns=spider.col)
                df.to_sql(name=f"mx_self_asin_detail", con=self.conn, if_exists='append', index=False)
                logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")
                asin_list = [(3, i[0]) for i in dates]
                if len(asin_list) == 1:
                    sql_update(sql_up, asin_list[0])
                else:
                    sql_update_many(sql_up, asin_list)
                logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue":
                dates = [(1, v.get()) for i in range(0, v.qsize())]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")


class AmazonVariatSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.conn = get_con(self.site)
        self.s = sql_connect(self.site)
        self.q_dict = {"asin_queue": Queue(), "error_queue": Queue()}
        self.num = 1
        self.asin_table_name = selection_table_name.get(f"{self.site}_asin_table")
        self.variat_table_name = selection_table_name.get(f"{self.site}_asin_variat")

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        if item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("asin"))
        else:
            self.q_dict.get("asin_queue").put(list(item.values()))
        sql_up = f"UPDATE `{self.asin_table_name}` set state=(%s)  where asin=(%s);"
        for k, v in self.q_dict.items():
            state_num = 3
            if k == "asin_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                df = pd.DataFrame(dates, columns=spider.col)
                try:
                    df.to_sql(name=self.variat_table_name, con=self.conn, if_exists='append', index=False)
                    logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")
                except:
                    logging.info(f"入库失败重新创建数据库链接")
                    self.conn = get_con(self.site)
                    self.s = sql_connect(self.site)
                    state_num = 1
                asin_list = [(state_num, i[0]) for i in dates]
                if len(asin_list) == 1:
                    sql_update(sql_up, asin_list[0])
                else:
                    sql_update_many(sql_up, asin_list)
                logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态-----{len(dates)}---------{dates}")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        sql_up = f"UPDATE `{self.asin_table_name}` set state=(%s)  where asin=(%s);"
        for k, v in self.q_dict.items():
            if k == "asin_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                df = pd.DataFrame(dates, columns=spider.col)
                df.to_sql(name=self.variat_table_name, con=self.conn, if_exists='append', index=False)
                logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")
                asin_list = [(3, i[0]) for i in dates]
                if len(asin_list) == 1:
                    sql_update(sql_up, asin_list[0])
                else:
                    sql_update_many(sql_up, asin_list)
                logging.info(f"修改asin状态3-----{len(asin_list)}---------{asin_list}")
            elif k == "error_queue":
                dates = [(1, v.get()) for i in range(0, v.qsize())]
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")


class AmazonFeedbackSpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.conn = get_con(self.site)
        self.s = sql_connect(self.site)
        self.q_dict = {
            "seller_account_feedback_queue": Queue(),
            "seller_account_feedback_report_queue": Queue(),
            "asin_detail_product_2023_queue": Queue(),
            "error_queue": Queue()
        }
        self.num = 50
        self.engine_pg = get_country_engine(self.site)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def process_item(self, item, spider):
        if item.get("feedback_detail"):
            self.q_dict.get("seller_account_feedback_queue").put(list(item.get("feedback_detail").values()))
        elif item.get("commodity_num"):
            self.q_dict.get("seller_account_feedback_report_queue").put(list(item.get("commodity_num").values()))
        elif item.get("seller"):
            self.q_dict.get("asin_detail_product_2023_queue").put(item.get("seller"))
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("account_name"))

        # 改入库逻辑
        for k, v in self.q_dict.items():
            if k == "seller_account_feedback_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                df = pd.DataFrame(dates,
                                  columns=["site_name", "account_id", "count_30_day", "count_1_year", "count_lifetime", "created_at"])
                print(dates)
                df.to_sql(name=f"{self.site}_seller_account_feedback", con=self.conn, if_exists='append', index=False)
                logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")

                up_dates = [(3, i[1]) for i in dates]
                sql_up = f"UPDATE `{spider.site}_seller_account_syn` set state=(%s)  where id=(%s);"
                if len(dates) == 1:
                    sql_update(sql_up, up_dates[0])
                else:
                    sql_update_many(sql_up, up_dates)
                logging.info(f"修改asin状态3-----{len(up_dates)}---------{up_dates}")
            elif k == "seller_account_feedback_report_queue" and v.qsize() >= self.num:
                dates = [tuple(v.get()) for i in range(0, self.num)]
                print(dates)
                ym = str(spider.week["created_time"])[:7].replace("-0", "_")
                sql_up = f"UPDATE `{self.site}_seller_account_feedback_report` set `num`=(%s)  where account_id=(%s) and ym='{ym}';"
                print(sql_up)
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                print(dates)
                logging.info(f"更新seller_account_feedback_report num数据-----{len(dates)}---------{dates}")
            elif k == "asin_detail_product_2023_queue" and v.qsize() >= self.num:
                # 没有则新增  有则更新
                dates = []
                for i in range(0, self.num):
                    for k in v.get():
                        dates.append(list(k.values()))
                m = str(int(str(spider.week["created_time"]).split("-")[1]))
                dele_id = set([i[0] for i in dates])
                try:
                    with self.engine_pg.begin() as conn:
                        if len(dele_id) == 1:
                            sql_delete = f"delete from {self.site}_asin_detail_product_2023 where account_id in ('{tuple(dele_id)[0]}') and `month`={m};"
                        else:
                            sql_delete = f"delete from {self.site}_asin_detail_product_2023 where account_id in {tuple(dele_id)} and `month`={m};"
                        conn.execute(sql_delete)
                        print(sql_delete)
                        logging.info(f"列表数 {len(dele_id)}  删除成功--------------{dele_id}")
                except:
                    print("入库失败")
                    self.engine_pg = get_country_engine(self.site)
                df = pd.DataFrame(dates, columns=["account_id", "asin", "title", "img_url", "price", "rating",
                                                  "total_comments", "week", "row_num", "month"])
                df.to_sql(name=f"{self.site}_asin_detail_product_2023", con=self.conn, if_exists='append', index=False)
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                sql_up = f"UPDATE `{spider.site}_seller_account_syn` set state=(%s)  where account_name=(%s);"
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "seller_account_feedback_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                df = pd.DataFrame(dates,
                                  columns=["site_name", "account_id", "count_30_day", "count_1_year", "count_lifetime", "created_at"])
                df.to_sql(name=f"{self.site}_seller_account_feedback", con=self.conn, if_exists='append', index=False)
                logging.info(f"列表数 {len(dates)}  入库成功--------------{dates}")

                up_dates = [(3, i[1]) for i in dates]
                sql_up = f"UPDATE `{spider.site}_seller_account_syn` set state=(%s)  where id=(%s);"
                if len(dates) == 1:
                    sql_update(sql_up, up_dates[0])
                else:
                    sql_update_many(sql_up, up_dates)
                logging.info(f"修改asin状态3-----{len(up_dates)}---------{up_dates}")
            elif k == "seller_account_feedback_report_queue":
                dates = [tuple(v.get()) for i in range(0, v.qsize())]
                print(dates)
                ym = str(spider.week["created_time"])[:7].replace("-0", "_")
                sql_up = f"UPDATE `{self.site}_seller_account_feedback_report` set `num`=(%s)  where account_id=(%s) and ym='{ym}';"
                print(sql_up)
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"更新seller_account_feedback_report num数据-----{len(dates)}---------{dates}")
            elif k == "asin_detail_product_2023_queue":
                # 没有则新增  有则更新
                dates = []
                for i in range(0, v.qsize()):
                    for k in v.get():
                        dates.append(list(k.values()))
                if not dates:
                    logging.info("空白页")
                    continue
                m = str(int(str(spider.week["created_time"]).split("-")[1]))
                dele_id = set([i[0] for i in dates])
                try:
                    with self.engine_pg.begin() as conn:
                        if len(dele_id) == 1:
                            sql_delete = f"delete from {self.site}_asin_detail_product_2023 where account_id in ('{tuple(dele_id)[0]}') and `month`={m};"
                        else:
                            sql_delete = f"delete from {self.site}_asin_detail_product_2023 where account_id in {tuple(dele_id)} and `month`={m};"
                        conn.execute(sql_delete)
                        print(sql_delete)
                        logging.info(f"列表数 {len(dele_id)}  删除成功--------------{dele_id}")
                except:
                    print("入库失败")
                    self.engine_pg = get_country_engine(self.site)
                df = pd.DataFrame(dates, columns=["account_id", "asin", "title", "img_url", "price", "rating",
                                                  "total_comments", "week", "row_num", "month"])
                df.to_sql(name=f"{self.site}_asin_detail_product_2023", con=self.conn, if_exists='append', index=False)
            elif k == "error_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                sql_up = f"UPDATE `{spider.site}_seller_account_syn` set state=(%s)  where account_name=(%s);"
                if len(dates) == 1:
                    sql_update(sql_up, dates[0])
                else:
                    sql_update_many(sql_up, dates)
                logging.info(f"修改asin状态-----{len(dates)}---------{dates}")

