import time
import logging
import pandas as pd
from queue import Queue
# useful for handling different item types with a single interface
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
from amazon_spider.db.pg_db import get_pg_country_engine
from amazon_spider.db.mysql_db import get_country_engine


class Detail1688SpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue(),
        }
        self.num = 150

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    @func_set_timeout(300)
    def del_pg_asin(self, sql, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_pg_country_engine(site)
            with e.connect() as conn:
                conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"失败sql为 {sql}")
            return False

    @func_set_timeout(30)
    def del_mysql_asin(self, sql, data=None, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_country_engine(site)
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            return False

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if self.is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if self.is_internet_available():
                    e = get_pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            for k, v in self.q_dict.items():
                if k == "inner_item_queue":
                    datas = [v.get().get('inner_item') for i in range(0, v.qsize())]
                    now_date = time.strftime("%Y-%m-%d", time.localtime())
                    if datas:
                        del_asin = [i.get("memberId") for i in datas if i.get("memberId")]
                        if del_asin:
                            if len(del_asin) == 1:
                                sql_del = f"delete from `1688_company_detail` where `memberId`= '{del_asin[0]}';"
                            else:
                                sql_del = f"delete from `1688_company_detail` where `memberId` in {tuple(del_asin)};"
                            # sql_delete(sql_del, site=self.site)
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_del, site=self.site):
                                        logging.info(f"删除 1688_company_detail-----{len(del_asin)}---------{del_asin}")
                                        break
                                    else:
                                        logging.info(
                                            f"删除 1688_company_detail-失败----{len(del_asin)}---------{del_asin}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(f"删除 1688_company_detail-超时{e}----{len(del_asin)}---------{del_asin}")
                                    continue
                            while True:
                                try:
                                    df = pd.DataFrame(datas, columns=spider.col)
                                    del df["search_term"]
                                    df.drop_duplicates(['memberId'], inplace=True)
                                    if self.df_to_sql(f"1688_company_detail", df, site="us", db="mysql"):
                                        logging.info(
                                            f"更新mysql 数据库 1688_company_detail-----{len(datas)}---------{datas}")
                                        break
                                    else:
                                        logging.info(f"更新mysql 数据库 1688_company_detail-----失败")
                                        continue
                                except OperationalError as e:
                                    logging.info(f"更新mysql 数据库 1688_company_detail失败  连接错误{e}")
                                    continue
                                except FunctionTimedOut as e:
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_detail失败-超时-{e}---{len(datas)}---------{datas}")
                                    continue

                            up_asin = [(3, i.get("memberId"), i.get("search_term")) for i in datas]
                            sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                            if len(up_asin) == 1:
                                d = up_asin[0]
                            else:
                                d = up_asin
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                        logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                                        break
                                    else:
                                        logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                                    continue
                elif k == "error_queue":
                    dates = [v.get() for i in range(0, v.qsize())]
                    if len(dates) == 1:
                        d = dates[0]
                    else:
                        d = dates
                    sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"
                    if d:
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                                continue
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item)
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("memberId"))

        for k, v in self.q_dict.items():
            if k == "inner_item_queue" and v.qsize() >= self.num:
                datas = [v.get().get('inner_item') for i in range(0, self.num)]

                now_date = time.strftime("%Y-%m-%d", time.localtime())
                if datas:
                    del_asin = [i.get("memberId") for i in datas if i.get("memberId")]
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_detail` where `memberId`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_detail` where `memberId` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_detail-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_detail-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_detail-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                del df["search_term"]
                                df.drop_duplicates(['memberId'], inplace=True)
                                if self.df_to_sql(f"1688_company_detail", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_detail-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_detail-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_detail失败  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_detail失败-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = [(3, i.get("memberId"), i.get("search_term")) for i in datas]
                        sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                        continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "inner_item_queue":
                datas = [v.get().get('inner_item') for i in range(0, v.qsize())]
                now_date = time.strftime("%Y-%m-%d", time.localtime())
                if datas:
                    del_asin = [i.get("memberId") for i in datas if i.get("memberId")]
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_detail` where `memberId`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_detail` where `memberId` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_detail-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_detail-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_detail-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                del df["search_term"]
                                df.drop_duplicates(['memberId'], inplace=True)
                                if self.df_to_sql(f"1688_company_detail", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_detail-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_detail-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_detail失败  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_detail失败-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = [(3, i.get("memberId"), i.get("search_term")) for i in datas]
                        sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改asin状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改asin状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改asin状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                        continue


class ContactInfo1688SpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue(),
        }
        self.num = 150

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    @func_set_timeout(300)
    def del_pg_asin(self, sql, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_pg_country_engine(site)
            with e.connect() as conn:
                conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"失败sql为 {sql}")
            return False

    @func_set_timeout(30)
    def del_mysql_asin(self, sql, data=None, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_country_engine(site)
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            return False

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if self.is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if self.is_internet_available():
                    e = get_pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            for k, v in self.q_dict.items():
                if k == "inner_item_queue":
                    datas = [v.get().get('inner_item') for i in range(0, v.qsize())]
                    now_date = time.strftime("%Y-%m-%d", time.localtime())
                    if datas:
                        del_asin = [i.get("memberId") for i in datas]
                        if del_asin:
                            if len(del_asin) == 1:
                                sql_del = f"delete from `1688_company_contact_info` where `memberId`= '{del_asin[0]}';"
                            else:
                                sql_del = f"delete from `1688_company_contact_info` where `memberId` in {tuple(del_asin)};"
                            # sql_delete(sql_del, site=self.site)
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_del, site=self.site):
                                        logging.info(
                                            f"删除 1688_company_contact_info-----{len(del_asin)}---------{del_asin}")
                                        break
                                    else:
                                        logging.info(
                                            f"删除 1688_company_contact_info-失败----{len(del_asin)}---------{del_asin}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(
                                        f"删除 1688_company_contact_info-超时{e}----{len(del_asin)}---------{del_asin}")
                                    continue
                            while True:
                                try:
                                    df = pd.DataFrame(datas, columns=spider.col)
                                    del df["search_term"]
                                    df.drop_duplicates(['memberId'], inplace=True)
                                    if self.df_to_sql(f"1688_company_contact_info", df, site="us", db="mysql"):
                                        logging.info(
                                            f"更新mysql 数据库 1688_company_contact_info-----{len(datas)}---------{datas}")
                                        break
                                    else:
                                        logging.info(f"更新mysql 数据库 1688_company_contact_info-----失败")
                                        continue
                                except OperationalError as e:
                                    logging.info(f"更新mysql 数据库 1688_company_contact_info  连接错误{e}")
                                    continue
                                except FunctionTimedOut as e:
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_contact_info-超时-{e}---{len(datas)}---------{datas}")
                                    continue

                            up_asin = [(5, i.get("memberId"), i.get("search_term")) for i in datas]
                            sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                            if len(up_asin) == 1:
                                d = up_asin[0]
                            else:
                                d = up_asin
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                        logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                                        break
                                    else:
                                        logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                                    continue
                elif k == "error_queue":
                    dates = [v.get() for i in range(0, v.qsize())]
                    if len(dates) == 1:
                        d = dates[0]
                    else:
                        d = dates
                    sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                    while True:
                        try:
                            if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                                break
                            else:
                                logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                                continue
                        except FunctionTimedOut as e:
                            logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                            continue
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item)
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("memberId"))

        for k, v in self.q_dict.items():
            if k == "inner_item_queue" and v.qsize() >= self.num:
                datas = [v.get().get('inner_item') for i in range(0, self.num)]

                now_date = time.strftime("%Y-%m-%d", time.localtime())
                if datas:
                    del_asin = [i.get("memberId") for i in datas]
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_contact_info` where `memberId`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_contact_info` where `memberId` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_contact_info-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_contact_info-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_contact_info-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                del df["search_term"]
                                df.drop_duplicates(['memberId'], inplace=True)
                                if self.df_to_sql(f"1688_company_contact_info", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_contact_info-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_contact_info-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_contact_info  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_contact_info-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = [(5, i.get("memberId"), i.get("search_term")) for i in datas]
                        sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                        continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "inner_item_queue":
                datas = [v.get().get('inner_item') for i in range(0, v.qsize())]
                now_date = time.strftime("%Y-%m-%d", time.localtime())
                if datas:
                    del_asin = [i.get("memberId") for i in datas]
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_contact_info` where `memberId`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_contact_info` where `memberId` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_contact_info-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_contact_info-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_contact_info-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                del df["search_term"]
                                df.drop_duplicates(['memberId'], inplace=True)
                                if self.df_to_sql(f"1688_company_contact_info", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_contact_info-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_contact_info-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_contact_info  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_contact_info-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = [(5, i.get("memberId"), i.get("search_term")) for i in datas]
                        sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_company_id` set `state`=(%s)  where memberId=(%s) and search_term=(%s);"

                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改memberId状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改memberId状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改memberId状态3-超时{e}----{len(d)}---------{d}")
                        continue


class Search1688SpiderPipeline:
    def __init__(self, site):
        self.site = site
        self.q_dict = {
            "inner_item_queue": Queue(),
            "error_queue": Queue(),
        }
        self.num = 10

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            site=crawler.spider.site
        )

    def is_internet_available(self):
        import requests
        try:
            requests.get("http://www.baidu.com", timeout=1)
            return True
        except:
            return False

    @func_set_timeout(300)
    def del_pg_asin(self, sql, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_pg_country_engine(site)
            with e.connect() as conn:
                conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            logging.info(f"失败sql为 {sql}")
            return False

    @func_set_timeout(30)
    def del_mysql_asin(self, sql, data=None, site="us"):
        try:
            if not self.is_internet_available():
                return False
            e = get_country_engine(site)
            with e.connect() as conn:
                if data != None:
                    if data:
                        conn.execute(sql, data)
                else:
                    conn.execute(sql)
                e.dispose()
                # conn.commit()
                # conn.close()
                return True
        except OperationalError as e:
            return False

    @func_set_timeout(300)
    def df_to_sql(self, table_name, df, site="us", db="mysql"):
        try:
            if db == "mysql":
                if self.is_internet_available():
                    e = get_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
            elif db == "pg":
                if self.is_internet_available():
                    e = get_pg_country_engine(site)
                    df.to_sql(name=table_name, con=e, if_exists='append', index=False)
                    e.dispose()
                    return True
                else:
                    return False
        except OperationalError as e:
            return False

    def process_item(self, item, spider):
        if item.get("finish_spider"):
            print('等待时 将队列数据存储', {k: v.qsize() for k, v in self.q_dict.items()})
            for k, v in self.q_dict.items():
                if k == "inner_item_queue":
                    datas = []
                    for i in range(0, v.qsize()):
                        datas += v.get().get('inner_item')
                    print(datas)
                    if datas:
                        del_asin = list(set([i.get("search_term") for i in datas]))
                        if del_asin:
                            if len(del_asin) == 1:
                                sql_del = f"delete from `1688_company_id` where `search_term`= '{del_asin[0]}';"
                            else:
                                sql_del = f"delete from `1688_company_id` where `search_term` in {tuple(del_asin)};"
                            # sql_delete(sql_del, site=self.site)
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_del, site=self.site):
                                        logging.info(f"删除 1688_company_id-----{len(del_asin)}---------{del_asin}")
                                        break
                                    else:
                                        logging.info(f"删除 1688_company_id-失败----{len(del_asin)}---------{del_asin}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(f"删除 1688_company_id-超时{e}----{len(del_asin)}---------{del_asin}")
                                    continue
                            while True:
                                try:
                                    df = pd.DataFrame(datas, columns=spider.col)
                                    df.drop_duplicates(['memberId', 'search_term'], inplace=True)
                                    logging.info(f"去重后数据为{df.shape}")
                                    if self.df_to_sql(f"1688_company_id", df, site="us", db="mysql"):
                                        logging.info(
                                            f"更新mysql 数据库 1688_company_id-----{len(datas)}---------{datas}")
                                        break
                                    else:
                                        logging.info(f"更新mysql 数据库 1688_company_id-----失败")
                                        continue
                                except OperationalError as e:
                                    logging.info(f"更新mysql 数据库 1688_company_id  连接错误{e}")
                                    continue
                                except FunctionTimedOut as e:
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_id-超时-{e}---{len(datas)}---------{datas}")
                                    continue

                            up_asin = list(set([(3, i.get("search_term")) for i in datas]))
                            sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"

                            if len(up_asin) == 1:
                                d = up_asin[0]
                            else:
                                d = up_asin
                            while True:
                                try:
                                    if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                        logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                                        break
                                    else:
                                        logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                                        continue
                                except FunctionTimedOut as e:
                                    logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                                    continue
                elif k == "error_queue":
                    dates = [v.get() for i in range(0, v.qsize())]
                    if len(dates) == 1:
                        d = dates[0]
                    else:
                        d = dates
                    sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"
                    while True:
                        try:
                            if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                                break
                            else:
                                logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                                continue
                        except FunctionTimedOut as e:
                            logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                            continue
        if item.get("inner_item"):
            self.q_dict.get("inner_item_queue").put(item)
        elif item.get("error_asin"):
            self.q_dict.get("error_queue").put(item.get("search_term"))

        for k, v in self.q_dict.items():
            if k == "inner_item_queue" and v.qsize() >= self.num:
                datas = []
                for i in range(0, self.num):
                    datas += v.get().get('inner_item')
                print(datas)
                if datas:
                    del_asin = list(set([i.get("search_term") for i in datas]))
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_id` where `search_term`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_id` where `search_term` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_id-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_id-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_id-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                df.drop_duplicates(['memberId', 'search_term'], inplace=True)
                                logging.info(f"去重后数据为{df.shape}")
                                if self.df_to_sql(f"1688_company_id", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_id-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_id-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_id  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_id-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = list(set([(3, i.get("search_term")) for i in datas]))
                        sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue" and v.qsize() >= self.num:
                dates = [v.get() for i in range(0, self.num)]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"
                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                        continue

    def close_spider(self, spider):
        print('爬虫结束，存储最后 数据', {k: v.qsize() for k, v in self.q_dict.items()})
        for k, v in self.q_dict.items():
            if k == "inner_item_queue":
                datas = []
                for i in range(0, v.qsize()):
                    datas += v.get().get('inner_item')
                print(datas)
                print(len(datas))
                if datas:
                    del_asin = list(set([i.get("search_term") for i in datas]))
                    if del_asin:
                        if len(del_asin) == 1:
                            sql_del = f"delete from `1688_company_id` where `search_term`= '{del_asin[0]}';"
                        else:
                            sql_del = f"delete from `1688_company_id` where `search_term` in {tuple(del_asin)};"
                        # sql_delete(sql_del, site=self.site)
                        while True:
                            try:
                                if self.del_mysql_asin(sql_del, site=self.site):
                                    logging.info(f"删除 1688_company_id-----{len(del_asin)}---------{del_asin}")
                                    break
                                else:
                                    logging.info(f"删除 1688_company_id-失败----{len(del_asin)}---------{del_asin}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"删除 1688_company_id-超时{e}----{len(del_asin)}---------{del_asin}")
                                continue
                        while True:
                            try:
                                df = pd.DataFrame(datas, columns=spider.col)
                                df.drop_duplicates(['memberId', 'search_term'], inplace=True)
                                logging.info(f"去重后数据为{df.shape}")
                                if self.df_to_sql(f"1688_company_id", df, site="us", db="mysql"):
                                    logging.info(
                                        f"更新mysql 数据库 1688_company_id-----{len(datas)}---------{datas}")
                                    break
                                else:
                                    logging.info(f"更新mysql 数据库 1688_company_id-----失败")
                                    continue
                            except OperationalError as e:
                                logging.info(f"更新mysql 数据库 1688_company_id  连接错误{e}")
                                continue
                            except FunctionTimedOut as e:
                                logging.info(
                                    f"更新mysql 数据库 1688_company_id-超时-{e}---{len(datas)}---------{datas}")
                                continue

                        up_asin = list(set([(3, i.get("search_term")) for i in datas]))
                        sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"

                        if len(up_asin) == 1:
                            d = up_asin[0]
                        else:
                            d = up_asin
                        while True:
                            try:
                                if self.del_mysql_asin(sql_up, data=d, site=self.site):
                                    logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                                    break
                                else:
                                    logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                                    continue
                            except FunctionTimedOut as e:
                                logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                                continue
            elif k == "error_queue":
                dates = [v.get() for i in range(0, v.qsize())]
                if len(dates) == 1:
                    d = dates[0]
                else:
                    d = dates
                sql_up = f"UPDATE `1688_search_term` set `state`=(%s)  where search_term=(%s);"
                while True:
                    try:
                        if self.del_mysql_asin(sql_up, data=d, site=self.site):
                            logging.info(f"修改 search_term 状态3-----{len(d)}---------{d}")
                            break
                        else:
                            logging.info(f"修改 search_term 状态3-失败----{len(d)}---------{d}")
                            continue
                    except FunctionTimedOut as e:
                        logging.info(f"修改 search_term 状态3-超时{e}----{len(d)}---------{d}")
                        continue