import time
import logging
import pandas as pd
from queue import Queue
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
# useful for handling different item types with a single interface
from amazon_spider.db.mysql_db import del_mysql_asin, df_to_sql


class AmazonSearchSpiderPipeline:
    def __init__(self, site):
        self.site = site
        # self.pg_engine = get_pg_country_engine(self.site)
        # self.pg_conn = get_pg_con(self.site)
        # self.us_conn = get_con("us")
        self.q_dict = {
            # "inner_item_queue": Queue(),
            # "variat_item_queue": Queue(),
            # "collection_syn_queue": Queue(),
            # "asin_detail_product_2023_queue": Queue(),
            "error_queue": Queue()
        }
        self.num = 1

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    def process_item(self, item, spider):
        if da := item.get('items'):
            if da:
                site = item.get("site")
                logging.info(f"当前站点为 {site}")
                df = pd.DataFrame(da, columns=["asin", "asin_compet", "img_url", "page", "page_row", "state", "site"])
                # df.to_csv('result.csv', mode='a')
                da_key_word = [i[0] for i in da]
                da_key_word = list(set(da_key_word))
                if da_key_word:
                    if len(da_key_word) == 1:
                        sql_del = f"delete from `us_self_asin_compet_amazon` where `asin`= '{da_key_word[0]}' and `site`='{site}';"
                    else:
                        sql_del = f"delete from `us_self_asin_compet_amazon` where `asin` in {tuple(da_key_word)} and `site`='{site}';"
                    while True:
                        try:
                            if self.is_internet_available():
                                if del_mysql_asin(sql_del, site="us"):
                                    logging.info(f"清理 us_self_asin_compet_amazon表内 关键词数据 {da_key_word}")
                                    break
                                else:
                                    time.sleep(10)
                                    logging.info(f"清理 us_self_asin_compet_amazon表内 关键词数据 失败 {da_key_word}")
                                    continue
                            else:
                                time.sleep(3)
                                logging.info(f"网络链接 超时")
                                continue
                        except FunctionTimedOut as e:
                            time.sleep(10)
                            logging.info(f"清理 us_self_asin_compet_amazon表内 关键词数据 超时 {e}")
                            continue

                    while True:
                        try:
                            df_to_sql(f"us_self_asin_compet_amazon", df, site="us", db="mysql")
                            logging.info("存储到us_self_asin_compet_amazon表成功 ")
                            break
                        except OperationalError as e:
                            time.sleep(10)
                            logging.info(f"存储到us_self_asin_compet_amazon表失败 {e}")
                            continue
                        except FunctionTimedOut as e:
                            time.sleep(10)
                            logging.info(f"存储到us_self_asin_compet_amazon表超时 {e}")
                            continue
                self.q_dict.get('error_queue').put((3, item.get("asin"), item.get("site")))
                # asin_list = [i[1] for i in da]
                # if asin_list:
                #     if len(asin_list) == 1:
                #         sql = f"SELECT distinct asin from {site}_asin_image WHERE asin = {asin_list[0]};"
                #     else:
                #         sql = f"SELECT distinct asin from {site}_asin_image WHERE asin in {tuple(asin_list)};"
                #     sql_connect(site)
                #     image_asin = [i.get("asin") for i in sql_fetch_rows(sql)]
                #
                #     not_in_img = []
                #     for i in asin_list:
                #         if i not in image_asin:
                #             not_in_img.append([i, site, 10, 1, 2, 4, None])
                #     # 测试
                #     # inset_sql = f"insert into `{self.site}_self_all_syn` (`asin`, `is_variation`, `data_type`, `state`, `priority`, `date_info`, `site`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`);"
                #     self_all_syn_site = site if site in ["us", "de", "fr", "es", "uk", "it"] else "us"
                #     inset_sql = f"insert into `{self_all_syn_site}_self_all_syn` (`asin`, `site`, `data_type`, `state`, `is_variation`, `priority`, `date_info`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`);"
                #     if len(not_in_img) == 1:
                #         sql_insert(inset_sql, not_in_img[0])
                #     else:
                #         sql_insert_many(inset_sql, not_in_img)
                #     logging.info(f"需要插入数据库数量为{len(not_in_img)}----{not_in_img}")
            else:
                self.q_dict.get('error_queue').put((3, item.get("asin"), item.get("site")))
        if item.get("error_asin"):
            self.q_dict.get('error_queue').put(item.get("search_term"))
        if self.q_dict.get("error_queue").qsize() >= self.num:
            sql_up = f"UPDATE `us_self_asin_top` set `state`=(%s)  where `asin`=(%s) and `site`=(%s);"
            dates = [self.q_dict.get("error_queue").get() for i in range(0, self.num)]
            while True:
                try:
                    if self.is_internet_available():
                        if del_mysql_asin(sql_up, data=dates, site="us"):
                            logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
                            break
                        else:
                            time.sleep(10)
                            logging.info(f"修改asin状态1-失败----{len(dates)}---------{dates}")
                            continue
                    else:
                        time.sleep(3)
                        logging.info(f"网络链接 超时")
                        continue
                except FunctionTimedOut as e:
                    time.sleep(10)
                    logging.info(f"修改asin状态1-超时----{e}")
                    continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "error_queue":
                if v.qsize():
                    # sql_connect("us")
                    dates = [self.q_dict.get("error_queue").get() for i in range(0, v.qsize())]
                    print(dates)
                    sql_up = f"UPDATE `us_self_asin_top` set `state`=(%s)  where `asin`=(%s) and `site`=(%s);"
                    while True:
                        try:
                            if self.is_internet_available():
                                if del_mysql_asin(sql_up, data=dates, site="us"):
                                    logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
                                    break
                                else:
                                    time.sleep(10)
                                    logging.info(f"修改asin状态1-失败----{len(dates)}---------{dates}")
                                    continue
                            else:
                                time.sleep(3)
                                logging.info(f"网络链接 超时")
                                continue
                        except FunctionTimedOut as e:
                            time.sleep(10)
                            logging.info(f"修改asin状态1-超时----{e}")
                            continue




# conn = get_con("us")
# s = sql_connect("us")
# dates = [('B09GM8Y8BN', 'amazon.com', '16707', '4.5', '167', 'Visit the AROEVE Store', 'Home & Kitchen', 'Heating, Cooling & Air Quality', 'Air Purifiers', 'HEPA Air Purifiers', '', '', '', '510192', '{"6474046": 804920, "6474270": 816090, "6476596": 576284, "6476866": 623361, "6478072": 712591}', '{"7141123011": 4860126}', '')]
#
# print(dates)
# # dates = list(set([tuple(v.get()) for i in range(0, self.num)]))
# # ['asin', 'salesChannel', 'reviews', 'rating', 'current_rank', 'brand_name', 'root_category', 'second_category', 'three_category', 'four_category', 'five_category', 'six_category', 'seven_category', 'cat_id', 'history_rank', 'history_category', 'current_rank_avg']
# # inset_sql = f"insert into `product_publish_keepa_text` (`asin`, `salesChannel`, `reviews`, `rating`, `current_rank`, `brand_name`, `root_category`, `second_category`, `three_category`, `four_category`, `five_category`, `six_category`, `seven_category`, `cat_id`, `history_rank`, `history_category`, `current_rank_avg`) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `history_rank` = values(`history_rank`)"
# inset_sql = f"REPLACE into `product_publish_keepa_text` (`asin`, `salesChannel`, `reviews`, `rating`, `current_rank`, `brand_name`, `root_category`, `second_category`, `three_category`, `four_category`, `five_category`, `six_category`, `seven_category`, `cat_id`, `history_rank`, `history_category`, `current_rank_avg`) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
# if len(dates) == 1:
#     sql_insert(inset_sql, dates[0])
# else:
#     sql_insert_many(inset_sql, dates)
# logging.info(f"更新product_publish_keepa_text-----{len(dates)}---------{dates}")