import time
import logging
import pandas as pd
from queue import Queue
# useful for handling different item types with a single interface
from amazon_spider.utils.common import is_internet_available
from amazon_spider.db.mysql_db import sql_update, sql_update_many, sql_connect, sql_insert_many, sql_insert


class ContactInfoPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue(),
        }
        self.num = 1
        sql_connect(self.site)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def sql_error_retry(self, f, sql, d):
        while True:
            if is_internet_available():
                return f(sql, d)
            else:
                time.sleep(3)
                logging.info(f"requests baidu error --> T_T")
                continue

    def queue_consumer(self, q_size):
        for k, v in self.q_dict.items():
            if q_size == "max":
                if v.qsize():
                    dates = [v.get() for i in range(0, v.qsize())]
                else:
                    dates = []
            else:
                if v.qsize() >= self.num:
                    dates = [v.get() for i in range(0, self.num)]
                else:
                    dates = []
            if dates:
                df = pd.DataFrame(dates)
                if k == "inner_item_queue":
                    if dates:
                        inset_sql = f"insert into `company_info_1688` (`company_name`, `mobileNo`, `phoneNum`, `fax`, `contact_name`, `position`, `memberId`, `card_url`, `home_url`, `address`, `state`, `updated_at`) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `company_name` = values(`company_name`), `mobileNo` = values(`mobileNo`), `phoneNum` = values(`phoneNum`), `fax` = values(`fax`), `contact_name` = values(`contact_name`),  `position` = values(`position`), `card_url` = values(`card_url`), `home_url` = values(`home_url`), `address` = values(`address`), `state` = values(`state`), `updated_at` = values(`updated_at`);"
                        d = [list(i) for i in df.values]
                        if len(d) == 1:
                            sql_id = self.sql_error_retry(sql_insert, inset_sql, d[0])
                        else:
                            sql_id = self.sql_error_retry(sql_insert_many, inset_sql, d)
                        logging.info(f"sql_id {sql_id} save company_info_1688 succeed {d[0:3]}")
                elif k == "error_queue":
                    if dates:
                        up_datas = [list(i) for i in df.values]
                        # 表名需要改
                        sql_up = f"UPDATE `company_info_1688` set `state`=(%s)  where memberId=(%s);"
                        if len(up_datas) == 1:
                            sql_id = self.sql_error_retry(sql_update, sql_up, up_datas[0])
                        else:
                            sql_id = self.sql_error_retry(sql_update_many, sql_up, up_datas)
                        logging.info(f"sql_id {sql_id} update company_info_1688 succeed {up_datas[0:3]}")

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            self.queue_consumer(q_size="max")
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item.get('inner_item'))
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("memberId"))

        self.queue_consumer("min")

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        self.queue_consumer(q_size="max")