import logging
import time
import pandas as pd
from queue import Queue
# useful for handling different item types with a single interface


class TemuDetailSpiderTextPipeline:
    def __init__(self, site):
        self.site = site
        # self.us_conn = get_con("us")
        # sql_connect("us")
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue()
        }
        self.num = 200

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    # @func_set_timeout(300)
    # def df_to_sql(self, table_name, df, site="us", db="mysql"):
    #     try:
    #         if db == "mysql":
    #             if self.is_internet_available():
    #                 # e = get_country_engine(site)
    #                 df.to_sql(name=table_name, con=self.us_conn, if_exists='append', index=False)
    #                 return True
    #             else:
    #                 return False
    #         # elif db == "pg":
    #         #     if self.is_internet_available():
    #         #         e = get_pg_country_engine(site)
    #         #         df.to_sql(name=table_name, con=e, if_exists='append', index=False)
    #         #         e.dispose()
    #         #         return True
    #         #     else:
    #         #         return False
    #     except OperationalError as e:
    #         return False

    def process_item(self, item, spider):
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item)
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("status"))

        if self.q_dict.get("inner_item_queue").qsize() >= self.num:
            datas = [self.q_dict.get("inner_item_queue").get().get("inner_item") for i in range(0, self.num)]
            df = pd.DataFrame(datas, columns=spider.col)

            if df.shape[0]:
                logging.info("detail save csv")
                df.to_csv(f'{int(time.time())}.csv', mode='w')
                # goodid = list(df['goodid'])
                # if len(goodid) > 1:
                #     sql_del = f"delete from `us_temu_detail` where `goodid` in {tuple(goodid)};"
                # else:
                #     sql_del = f"delete from `us_temu_detail` where `goodid` = '{goodid[0]}';"
                #
                # while True:
                #     try:
                #         if self.is_internet_available():
                #             sql_delete(sql_del)
                #             logging.info(f"清理 us_temu_detail 表内 关键词数据 {goodid}")
                #             break
                #         else:
                #             time.sleep(3)
                #             logging.info("网络链接失败------")
                #             continue
                #     except FunctionTimedOut as er:
                #         time.sleep(3)
                #         logging.info(
                #             f"更新 us_temu_detail-超时-{er}---{len(goodid)}---------{goodid}")
                #         continue
                # while True:
                #     try:
                #         if self.df_to_sql(f"us_temu_detail", df=df, site=self.site, db="mysql"):
                #             logging.info(f"更新 us_temu_detail-----{len(datas)}---------{datas}")
                #             break
                #         else:
                #             logging.info(f"更新 us_temu_detail----失败")
                #             continue
                #     except OperationalError as e:
                #         time.sleep(3)
                #         logging.info(
                #             f"更新 us_temu_detail-失败-{e}---{len(datas)}---------{datas}")
                #         continue
                #     except FunctionTimedOut as er:
                #         # 因为超时报错 可能数据已经入库成功 造成入库两次
                #         # 所以添加查询处理
                #         logging.info("数据超时处理,判断是否入库")
                #         find_d = datas[0].get("goodid")
                #         find_sql = f"""select * from us_temu_detail where goodid='{find_d}';"""
                #         while True:
                #             try:
                #                 if self.is_internet_available():
                #                     spider_complete = sql_fetch_one(find_sql)
                #                     # spider_complete = self.df_read_sql(find_sql, self.site, "mysql")
                #                     break
                #                 else:
                #                     time.sleep(2)
                #                     logging.info(f"请求百度失败  等待3秒 -")
                #                     continue
                #             except OperationalError as e:
                #                 time.sleep(2)
                #                 logging.info(f"获取数据失败 -{e}")
                #                 continue
                #             except FunctionTimedOut as e:
                #                 time.sleep(2)
                #                 logging.info(f"查询数据超时 -{e}")
                #                 continue
                #         if spider_complete.size:
                #             logging.info("获取数据成功")
                #             break
                #         else:
                #             time.sleep(3)
                #             logging.info(
                #                 f"更新{self.site}us_temu_detail-超时-{er}---{len(datas)}---------{datas}")
                #             continue
                #
                # logging.info(f"入库成功-----{len(datas)}---------{datas}")
                #
                # up_datas = [(3, i) for i in df['goodid']]
                # sql_up = f"UPDATE `temu_good_id` set `state`=(%s)  where `goodid`=(%s);"
                # while True:
                #     try:
                #         if self.is_internet_available():
                #             if len(up_datas) == 1:
                #                 sql_update(sql_up, up_datas[0])
                #             else:
                #                 sql_update_many(sql_up, up_datas)
                #             logging.info(f"修改asin状态1-----{len(up_datas)}---------{up_datas}")
                #             break
                #         else:
                #             time.sleep(3)
                #             logging.info("修改asin状态1 网络链接失败------")
                #             continue
                #     except FunctionTimedOut as er:
                #         time.sleep(3)
                #         logging.info(f"修改asin状态1-超时-{er}---{len(up_datas)}---------{up_datas}")
                #         continue
        elif self.q_dict.get("error_queue").qsize() >= self.num:
            datas = [self.q_dict.get("error_queue").get() for i in range(0, self.num)]
            df = pd.DataFrame(datas, columns=["goodid", "status"])
            df.to_csv(f'error_goodid.csv', mode='r')
            # sql_up = f"UPDATE `temu_good_id` set `state`=(%s)  where `goodid`=(%s) and `search_term`=(%s);"
            # while True:
            #     try:
            #         if self.is_internet_available():
            #             if len(dates) == 1:
            #                 sql_update(sql_up, dates[0])
            #             else:
            #                 sql_update_many(sql_up, dates)
            #             logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
            #             break
            #         else:
            #             time.sleep(3)
            #             logging.info("修改asin状态1 网络链接失败------")
            #             continue
            #     except FunctionTimedOut as er:
            #         time.sleep(3)
            #         logging.info(f"修改asin状态1-超时-{er}---{len(dates)}---------{dates}")
            #         continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "inner_item_queue":
                datas = [self.q_dict.get("inner_item_queue").get().get("inner_item") for i in range(0, v.qsize())]
                df = pd.DataFrame(datas, columns=spider.col)
                if df.shape[0]:
                    df.to_csv(f'{int(time.time())}.csv', mode='w')
                    # goodid = list(df['goodid'])
                    # if len(goodid) > 1:
                    #     sql_del = f"delete from `us_temu_detail` where `goodid` in {tuple(goodid)};"
                    # else:
                    #     sql_del = f"delete from `us_temu_detail` where `goodid` = '{goodid[0]}';"
                    #
                    # while True:
                    #     try:
                    #         if self.is_internet_available():
                    #             sql_delete(sql_del)
                    #             logging.info(f"清理 us_temu_detail 表内 关键词数据 {goodid}")
                    #             break
                    #         else:
                    #             time.sleep(3)
                    #             logging.info("网络链接失败------")
                    #             continue
                    #     except FunctionTimedOut as er:
                    #         time.sleep(3)
                    #         logging.info(
                    #             f"更新 us_temu_detail-超时-{er}---{len(goodid)}---------{goodid}")
                    #         continue
                    #
                    # while True:
                    #     try:
                    #         if self.df_to_sql(f"us_temu_detail", df=df, site=self.site, db="mysql"):
                    #             logging.info(f"更新 us_temu_detail-----{len(datas)}---------{datas}")
                    #             break
                    #         else:
                    #             logging.info(f"更新 us_temu_detail----失败")
                    #             continue
                    #     except OperationalError as e:
                    #         time.sleep(3)
                    #         logging.info(
                    #             f"更新 us_temu_detail-失败-{e}---{len(datas)}---------{datas}")
                    #         continue
                    #     except FunctionTimedOut as er:
                    #         # 因为超时报错 可能数据已经入库成功 造成入库两次
                    #         # 所以添加查询处理
                    #         logging.info("数据超时处理,判断是否入库")
                    #         find_d = datas[0].get("goodid")
                    #         find_sql = f"""select * from us_temu_detail where goodid='{find_d}';"""
                    #         while True:
                    #             try:
                    #                 if self.is_internet_available():
                    #                     spider_complete = sql_fetch_one(find_sql)
                    #                     # spider_complete = self.df_read_sql(find_sql, self.site, "mysql")
                    #                     break
                    #                 else:
                    #                     time.sleep(2)
                    #                     logging.info(f"请求百度失败  等待3秒 -")
                    #                     continue
                    #             except OperationalError as e:
                    #                 time.sleep(2)
                    #                 logging.info(f"获取数据失败 -{e}")
                    #                 continue
                    #             except FunctionTimedOut as e:
                    #                 time.sleep(2)
                    #                 logging.info(f"查询数据超时 -{e}")
                    #                 continue
                    #         if spider_complete.size:
                    #             logging.info("获取数据成功")
                    #             break
                    #         else:
                    #             time.sleep(3)
                    #             logging.info(
                    #                 f"更新{self.site}us_temu_detail-超时-{er}---{len(datas)}---------{datas}")
                    #             continue
                    #
                    # logging.info(f"入库成功-----{len(datas)}---------{datas}")
                    # up_datas = [(3, i) for i in df['goodid']]
                    # sql_up = f"UPDATE `temu_good_id` set `state`=(%s)  where `goodid`=(%s);"
                    # while True:
                    #     try:
                    #         if self.is_internet_available():
                    #             if len(up_datas) == 1:
                    #                 sql_update(sql_up, up_datas[0])
                    #             else:
                    #                 sql_update_many(sql_up, up_datas)
                    #             logging.info(f"修改asin状态1-----{len(up_datas)}---------{up_datas}")
                    #             break
                    #         else:
                    #             time.sleep(3)
                    #             logging.info("修改asin状态1 网络链接失败------")
                    #             continue
                    #     except FunctionTimedOut as er:
                    #         time.sleep(3)
                    #         logging.info(f"修改asin状态1-超时-{er}---{len(up_datas)}---------{up_datas}")
                    #         continue
            if k == "error_queue":
                if v.qsize():
                    datas = [self.q_dict.get("error_queue").get() for i in range(0, v.qsize())]
                    df = pd.DataFrame(datas, columns=["goodid", "status"])
                    df.to_csv(f'error_goodid.csv', mode='r')
            #         sql_up = f"UPDATE `temu_good_id` set `state`=(%s)  where `goodid`=(%s) and `search_term`=(%s);"
            #
            #         while True:
            #             try:
            #                 if self.is_internet_available():
            #                     if len(dates) == 1:
            #                         sql_update(sql_up, dates[0])
            #                     else:
            #                         sql_update_many(sql_up, dates)
            #                     logging.info(f"修改asin状态1-----{len(dates)}---------{dates}")
            #                     break
            #                 else:
            #                     time.sleep(3)
            #                     logging.info("修改asin状态1 网络链接失败------")
            #                     continue
            #             except FunctionTimedOut as er:
            #                 time.sleep(3)
            #                 logging.info(f"修改asin状态1-超时-{er}---{len(dates)}---------{dates}")
            #                 continue
