import time
import logging
import pandas as pd
from queue import Queue
from sqlalchemy.exc import OperationalError
from func_timeout import func_set_timeout, FunctionTimedOut
# useful for handling different item types with a single interface
from amazon_spider.db.mysql_db import get_country_engine
from amazon_spider.db.pg_db import get_pg_country_engine


# class TemuSearchSpiderPipeline:
#     def __init__(self, site):
#         self.site = site
#         # self.pg_engine = get_pg_country_engine(self.site)
#         # self.pg_conn = get_pg_con(self.site)
#         # self.us_conn = get_country_engine("us")
#         self.q_dict = {
#             # "inner_item_queue": Queue(),
#             # "variat_item_queue": Queue(),
#             # "collection_syn_queue": Queue(),
#             # "asin_detail_product_2023_queue": Queue(),
#             "error_queue": Queue()
#         }
#         self.num = 1
#
#     @classmethod
#     def from_crawler(cls, crawler):
#         return cls(
#             site=crawler.spider.site
#         )
#
#     def process_item(self, item, spider):
#         if item.get('datas'):
#             self.us_conn = get_con("us")
#             df = pd.DataFrame(item.get('datas'), columns=["asin", "asin_compet", "img_url", "page", "page_row", "state"])
#             if item.get('asin'):
#                 sql_del = f"delete from `us_self_asin_compet_temu` where `asin`= '{item.get('asin')}';"
#                 sql_connect("us")
#                 sql_delete(sql_del)
#                 logging.info(f"清理 us_self_asin_compet_amazon表内 关键词数据 {item.get('asin')}")
#             df.to_sql(name=f"us_self_asin_compet_temu", con=self.us_conn, if_exists='append', index=False)
#             logging.info(f"入库成功-----{len(item.get('datas'))}---------{item.get('datas')}")
#             self.q_dict.get('error_queue').put((3, item.get("asin"), item.get("site")))
#
#         if item.get("error_asin"):
#             self.q_dict.get('error_queue').put(item.get("status"))
#         if self.q_dict.get("error_queue").qsize() >= self.num:
#             sql_connect("us")
#             dates = [self.q_dict.get("error_queue").get() for i in range(0, self.num)]
#             sql_up = f"UPDATE `us_self_asin_top` set `state`=(%s)  where `asin`=(%s) and `site`=(%s);"
#             if len(dates) == 1:
#                 sql_update(sql_up, dates[0])
#             else:
#                 sql_update_many(sql_up, dates)
#             logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
#
#     def close_spider(self, spider):
#         print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
#         for k, v in self.q_dict.items():
#             if k == "error_queue":
#                 if v.qsize():
#                     sql_connect("us")
#
#                     dates = [self.q_dict.get("error_queue").get() for i in range(0, v.qsize())]
#                     print(dates)
#                     sql_up = f"UPDATE `us_self_asin_top` set `state`=(%s)  where `asin`=(%s) and `site`=(%s);"
#                     if len(dates) == 1:
#                         sql_update(sql_up, dates[0])
#                     else:
#                         sql_update_many(sql_up, dates)
#                     logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")


class TemuSearchSpiderTextPipeline:
    def __init__(self, site):
        self.site = site
        # self.us_conn = get_country_engine("us")
        # sql_connect("us")
        self.q_dict = {
            # "inner_item_queue": Queue(),
            # "variat_item_queue": Queue(),
            # "collection_syn_queue": Queue(),
            # "asin_detail_product_2023_queue": Queue(),
            "error_queue": Queue()
        }
        self.num = 1

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    @func_set_timeout(100)
    def del_mysql_asin(self, sql, data=None, site="us"):
        try:
            e = get_country_engine(site)
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            return False

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if self.is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    return True
                else:
                    return False
            # elif db == "pg":
            #     if self.is_internet_available():
            #         e = get_pg_country_engine(site)
            #         df.to_sql(name=table_name, con=e, if_exists='append', index=False)
            #         e.dispose()
            #         return True
            #     else:
            #         return False
        except OperationalError as e:
            return False

    @func_set_timeout(100)
    def df_read_sql(self, find_sql, site="us", db="mysql"):
        if db == "mysql":
            e = get_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete
        elif db == "pg":
            e = get_pg_country_engine(site)
            spider_complete = pd.read_sql(find_sql, con=e)
            e.dispose()
            return spider_complete

    def process_item(self, item, spider):
        if item.get('datas'):
            df = pd.DataFrame(item.get('datas'), columns=["search_term", "title", "goodid", "img_url", "page", "img_url"])
            df.drop_duplicates(['goodid', 'search_term'], inplace=True)
            logging.info(f"去重后数据 量为{df.shape}")
            if item.get('search_term'):
                sql_del = f"delete from `temu_good_id` where `search_term`= '{item.get('search_term')}';"
                while True:
                    try:
                        if self.is_internet_available():
                            self.del_mysql_asin(sql_del, site=self.site)
                            logging.info(f"清理 temu_good_id 表内 关键词数据 {item.get('search_term')}")
                            break
                        else:
                            time.sleep(3)
                            logging.info("网络链接失败------")
                            continue
                    except FunctionTimedOut as er:
                        time.sleep(3)
                        logging.info(
                            f"更新 temu_good_id-超时-{er}---{len(item.get('datas'))}---------{item.get('search_term')}")
                        continue

            while True:
                try:
                    if self.df_to_sql(f"temu_good_id", df=df, site=self.site, db="mysql"):
                        logging.info(f"更新 temu_good_id-----{len(item.get('datas'))}---------{item.get('search_term')}")
                        break
                    else:
                        logging.info(f"更新 temu_good_id----失败")
                        continue
                except OperationalError as e:
                    time.sleep(3)
                    logging.info(
                        f"更新 temu_good_id-失败-{e}---{len(item.get('datas'))}---------{item.get('search_term')}")
                    continue
                except FunctionTimedOut as er:
                    # 因为超时报错 可能数据已经入库成功 造成入库两次
                    # 所以添加查询处理
                    logging.info("数据超时处理,判断是否入库")
                    find_d = item.get('search_term')
                    find_sql = f"""select * from temu_good_id where search_term='{find_d}';"""
                    while True:
                        try:
                            if self.is_internet_available():
                                # spider_complete = sql_fetch_one(find_sql)
                                spider_complete = self.df_read_sql(find_sql, self.site, "mysql")
                                break
                            else:
                                time.sleep(2)
                                logging.info(f"请求百度失败  等待3秒 -")
                                continue
                        except OperationalError as e:
                            time.sleep(2)
                            logging.info(f"获取数据失败 -{e}")
                            continue
                        except FunctionTimedOut as e:
                            time.sleep(2)
                            logging.info(f"查询数据超时 -{e}")
                            continue
                    if spider_complete.size:
                        logging.info("获取数据成功")
                        break
                    else:
                        time.sleep(3)
                        logging.info(
                            f"更新{self.site}_self_asin_detail-超时-{er}---{len(item.get('datas'))}---------{item.get('datas')}")
                        continue

            logging.info(f"入库成功-----{len(item.get('datas'))}---------{item.get('datas')}")
            self.q_dict.get('error_queue').put((3, item.get("search_term")))

        if self.q_dict.get("error_queue").qsize() >= self.num:
            dates = [self.q_dict.get("error_queue").get() for i in range(0, self.num)]
            sql_up = f"UPDATE `temu_search_term` set `state`=(%s)  where `search_term`=(%s);"

            while True:
                try:
                    if self.is_internet_available():
                        if len(dates) == 1:
                            d = dates[0]
                        else:
                            d = dates
                        self.del_mysql_asin(sql_up, data=d, site=self.site)
                        logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
                        break
                    else:
                        time.sleep(3)
                        logging.info("修改asin状态1 网络链接失败------")
                        continue
                except FunctionTimedOut as er:
                    time.sleep(3)
                    logging.info(f"修改asin状态1-超时-{er}---{len(dates)}---------{dates}")
                    continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "error_queue":
                if v.qsize():
                    dates = [self.q_dict.get("error_queue").get() for i in range(0, v.qsize())]
                    sql_up = f"UPDATE `temu_search_term` set `state`=(%s)  where `search_term`=(%s);"

                    while True:
                        try:
                            if self.is_internet_available():
                                if len(dates) == 1:
                                    d = dates[0]
                                else:
                                    d = dates
                                self.del_mysql_asin(sql_up, data=d, site=self.site)
                                logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
                                break
                            else:
                                time.sleep(3)
                                logging.info("修改asin状态1 网络链接失败------")
                                continue
                        except FunctionTimedOut as er:
                            time.sleep(3)
                            logging.info(f"修改asin状态1-超时-{er}---{len(dates)}---------{dates}")
                            continue
