import time
import logging
import pandas as pd
from queue import Queue
from sqlalchemy.exc import OperationalError
from func_timeout import func_set_timeout
from func_timeout.exceptions import FunctionTimedOut
# useful for handling different item types with a single interface
from amazon_spider.db.mysql_db import get_country_engine


class UpsSpiderPipeline:
    def __init__(self):
        self.q_dict = {"ups_queue": Queue(), "error_queue": Queue()}
        self.num = 50
        self.save_num = 20

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    @func_set_timeout(100)
    def del_mysql_asin(self, sql, data=None, site="us"):
        try:
            e = get_country_engine(site)
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            return False

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if self.is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if self.is_internet_available():
                    # e = get_pg_country_engine(site)
                    # df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    # e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            sql_up = f"UPDATE `ups_orders_syn` set state=(%s)  where id=(%s);"
            for k, v in self.q_dict.items():
                if v.qsize():
                    if k == "ups_queue":
                        state_num = 3
                        datas = [v.get() for i in range(0, v.qsize())]

                        save_data = [i.get("ups_data") for i in datas]
                        del_datas = [i[0] for i in save_data]
                        if len(del_datas) == 1:
                            sql_delete_bsr = f"delete from ups_orders_detail where transport_sn = '{del_datas[0]}';"
                        else:
                            sql_delete_bsr = f"delete from ups_orders_detail where transport_sn in {tuple(del_datas)};"
                            # conn.execute(sql_delete_bsr)
                        while True:
                            try:
                                if self.is_internet_available():
                                    if self.del_mysql_asin(sql_delete_bsr, site="us"):
                                        logging.info(f"清理ups_orders_detail表数据 {del_datas}")
                                        break
                                    else:
                                        time.sleep(3)
                                        logging.info(f"清理ups_orders_detail表数据失败 {del_datas}")
                                        continue
                                else:
                                    time.sleep(3)
                                    logging.info(f"清理ups_orders_detail表 网络链接失败 {del_datas}")
                                    continue
                            except FunctionTimedOut as e:
                                time.sleep(3)
                                logging.info(f"清理ups_orders_detail表 超时 {e}")
                                continue
                        df = pd.DataFrame(save_data, columns=spider.col)
                        try:
                            if self.df_to_sql("ups_orders_detail", df, site="us", db="mysql"):
                                logging.info(f"入库成功-----{len(save_data)}---------{save_data}")
                            else:
                                logging.info(f"入库失败  连接错误")
                                state_num = 1
                        except OperationalError as e:
                            logging.info(f"入库失败  连接错误{e}")
                            state_num = 1
                        except FunctionTimedOut as e:
                            logging.info(f"入库-超时{e}----{len(save_data)}---------{save_data}")
                            state_num = 1
                        up_data = [(state_num, i.get("id")) for i in datas]
                        if len(up_data) == 1:
                            d = up_data[0]
                        else:
                            d = up_data
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site="us"):
                                    logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                                continue
        if item.get("ups_data"):
            # 将数据存储到队列统一处理
            self.q_dict.get("ups_queue").put(item)

        sql_up = f"UPDATE `ups_orders_syn` set state=(%s)  where id=(%s);"
        for k, v in self.q_dict.items():
            if k == "ups_queue" and v.qsize() >= self.num:
                state_num = 3
                datas = [v.get() for i in range(0, self.num)]

                save_data = [i.get("ups_data") for i in datas]
                del_datas = [i[0] for i in save_data]
                if len(del_datas) == 1:
                    sql_delete_bsr = f"delete from ups_orders_detail where transport_sn = '{del_datas[0]}';"
                else:
                    sql_delete_bsr = f"delete from ups_orders_detail where transport_sn in {tuple(del_datas)};"
                    # conn.execute(sql_delete_bsr)
                while True:
                    try:
                        if self.is_internet_available():
                            if self.del_mysql_asin(sql_delete_bsr, site="us"):
                                logging.info(f"清理ups_orders_detail表数据 {del_datas}")
                                break
                            else:
                                time.sleep(3)
                                logging.info(f"清理ups_orders_detail表数据失败 {del_datas}")
                                continue
                        else:
                            time.sleep(3)
                            logging.info(f"清理ups_orders_detail表 网络链接失败 {del_datas}")
                            continue
                    except FunctionTimedOut as e:
                        time.sleep(3)
                        logging.info(f"清理ups_orders_detail表 超时 {e}")
                        continue
                df = pd.DataFrame(save_data, columns=spider.col)
                try:
                    if self.df_to_sql("ups_orders_detail", df, site="us", db="mysql"):
                        logging.info(f"入库成功-----{len(save_data)}---------{save_data}")
                    else:
                        logging.info(f"入库失败  连接错误")
                        state_num = 1
                except OperationalError as e:
                    logging.info(f"入库失败  连接错误{e}")
                    state_num = 1
                except FunctionTimedOut as e:
                    logging.info(f"入库-超时{e}----{len(save_data)}---------{save_data}")
                    state_num = 1
                up_data = [(state_num, i.get("id")) for i in datas]
                if len(up_data) == 1:
                    d = up_data[0]
                else:
                    d = up_data
                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site="us"):
                            logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                        continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        sql_up = f"UPDATE `ups_orders_syn` set state=(%s)  where id=(%s);"
        for k, v in self.q_dict.items():
            if k == "ups_queue":
                state_num = 3
                datas = [v.get() for i in range(0, v.qsize())]

                save_data = [i.get("ups_data") for i in datas]
                del_datas = [i[0] for i in save_data]
                if len(del_datas) == 1:
                    sql_delete_bsr = f"delete from ups_orders_detail where transport_sn = '{del_datas[0]}';"
                else:
                    sql_delete_bsr = f"delete from ups_orders_detail where transport_sn in {tuple(del_datas)};"
                    # conn.execute(sql_delete_bsr)
                while True:
                    try:
                        if self.is_internet_available():
                            if self.del_mysql_asin(sql_delete_bsr, site="us"):
                                logging.info(f"清理ups_orders_detail表数据 {del_datas}")
                                break
                            else:
                                time.sleep(3)
                                logging.info(f"清理ups_orders_detail表数据失败 {del_datas}")
                                continue
                        else:
                            time.sleep(3)
                            logging.info(f"清理ups_orders_detail表 网络链接失败 {del_datas}")
                            continue
                    except FunctionTimedOut as e:
                        time.sleep(3)
                        logging.info(f"清理ups_orders_detail表 超时 {e}")
                        continue
                df = pd.DataFrame(save_data, columns=spider.col)
                try:
                    if self.df_to_sql("ups_orders_detail", df, site="us", db="mysql"):
                        logging.info(f"入库成功-----{len(save_data)}---------{save_data}")
                    else:
                        logging.info(f"入库失败  连接错误")
                        state_num = 1
                except OperationalError as e:
                    logging.info(f"入库失败  连接错误{e}")
                    state_num = 1
                except FunctionTimedOut as e:
                    logging.info(f"入库-超时{e}----{len(save_data)}---------{save_data}")
                    state_num = 1
                up_data = [(state_num, i.get("id")) for i in datas]
                if len(up_data) == 1:
                    d = up_data[0]
                else:
                    d = up_data
                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site="us"):
                            logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                        continue

