import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from func_timeout import func_set_timeout
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
import traceback
import time
import random
from amazon_spider.VPS_IP import is_internet_available

print('存储 asin 到pg数据库')


class Save_asin_detail(BaseUtils):
    def __init__(self, site_name=None, proxy_name=None, week=None, month=None, spider_int=None):
        super().__init__()
        self.site_name = site_name  # 站点
        self.month = month
        self.spider_int = spider_int
        self.asin_detail_list = []
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, proxy_name=proxy_name)
        self.init_db_names()
        self.cols = self.reuests_para_val.db_column(site_name)

    def init_db_names(self):
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()  # 更改变体 时 存储 变体表 使用 self.engine
        self.db_syn = self.site_name + '_all_syn_st_month_2024'
        self.db_seller_account_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_syn'][2:] + '_distinct'
        self.db_seller_asin_account = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_asin_account'][2:]


    @func_set_timeout(350)
    def process_item(self, item_queue, requests_error_asin_list, asin_list_update, asin_not_found_list,
                     asin_not_sure_list, asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                     asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                     star_list, add_cart_asin_list, bs_category_asin_list, week_):
        print('week_::',week_)
        print("=================开始存储数据======================")
        while True:
            if item_queue.empty() == False:
                item = item_queue.get()
                for i in item:
                    if item.get(i) == 'null' or item.get(i) == 'None' or item.get(i) == 'none' or item.get(i) == '':
                        item[i] = None
                item_list = []
                # 需要存到数据库的字段
                if item['volume']:
                    if len(item['volume']) > 38:
                        item['volume'] = None
                for i in self.cols:
                    item_list.append(item[i])
                self.asin_detail_list.append(item_list)
            else:
                if item_queue.empty():
                    self.save_data(requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                                   asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                                   asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                                   star_list, add_cart_asin_list, bs_category_asin_list, week_)
                    self.asin_detail_list = []
                    print("结束--跳出--存储")
                    break


    def save_data(self, requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                  asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list, asin_not_response_list,
                  asin_not_redirect_list, asin_not_div_id_dp_list, star_list,
                  add_cart_asin_list, bs_category_asin_list, week_):
        # self.asin_not_buyBox_list = asin_not_buyBox_list
        # self.asin_not_foot_list = asin_not_foot_list
        print('233333333333333333333333333333')
        print('self.cols::',self.cols)


        df_asin_detail = pd.DataFrame(data=self.asin_detail_list, columns=self.cols)
        # self.asin_list_update = list(df_asin_detail.asin)
        #
        # asin_week_list = [[asin, str(week)] for asin, week in
        #                   zip(list(df_asin_detail['asin']), list(df_asin_detail['date_info']))]
        # asin_week_dict = {}
        # for asin_week in asin_week_list:
        #     asin, week_list = asin_week[0], str(asin_week[1])
        #     week_list = week_list.split(",")
        #     for week in week_list:
        #         if week not in asin_week_dict.keys():
        #             asin_week_dict[week] = []
        #         var_list = asin_week_dict[week]
        #         var_list.append(asin)
        #         asin_week_dict[week] = var_list
        #
        # for week, asin_list in asin_week_dict.items():
        #     report_info = week.replace('-', "_")
        #     while True:
        #         try:
        #             if is_internet_available():
        #                 pass
        #             else:
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()
        print(f"===============存储pg詳情數據数据=={self.site_name}_asin_detail_month_=====")
        # df = df_asin_detail.loc[df_asin_detail.asin.isin(asin_week_dict[week])]
        df = df_asin_detail.loc[:, self.cols]
        df.ac_name = df.ac_name.apply(lambda x: str(x)[:100] if x is not None else None)  # 截取字符
        df.brand = df.brand.apply(lambda x: str(x)[:100] if x is not None else None)  # 截取字符
        df.title = df.title.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
        df.category = df.category.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
        df.img_url = df.img_url.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
        df.material = df.material.apply(lambda x: str(x)[:150] if x is not None else None)  # 截取字符
        df.volume = df.volume.apply(lambda x: str(x)[:50] if x is not None else None)  # 截取字符
        df.package_quantity = df.package_quantity.apply(
            lambda x: str(x)[:50] if x is not None else None)  # 截取字符
        df.pattern_name = df.pattern_name.apply(lambda x: str(x)[:50] if x is not None else None)  # 截取字符
        df.weight_str = df.weight_str.apply(lambda x: str(x)[:250] if x is not None else None)  # 截取字符

        df.to_sql("us_asin_detail_month_2024_07_copy", con=self.engine_pg,
                  if_exists='append',
                  index=False)
                #     break
                # except Exception as e:
                #     self.engine = self.mysql_connect()
                #     self.engine_pg = self.pg_connect()
                #     time.sleep(random.uniform(10, 20.5))
                #     print(f"存储'{self.site_name} 存储詳情數據 数据'失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                #     time.sleep(5)
                #     continue

    @func_set_timeout(240)
    def save_bs_category_asin_detail(self, bs_category_asin_list_pg):
        # 存储 asin bsr 文本
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                print("存储 asin bsr 文本 存储pg", len(bs_category_asin_list_pg))
                df_asin_bsr_pg = pd.DataFrame(data=bs_category_asin_list_pg,
                                              columns=['asin', 'date_info', 'best_sellers_rank', 'last_herf',
                                                       'all_best_sellers_href'])
                df_asin_bsr_pg.drop_duplicates(['asin'], inplace=True)  # 去重
                df_asin_bsr_pg['spider_int'] = self.spider_int
                if df_asin_bsr_pg.shape[0] > 0:
                    date_info_ = list(df_asin_bsr_pg.date_info)[0].replace('-', '_')
                    print(f'{self.site_name}_bs_category_asin_detail_month_{date_info_}')
                    df_asin_bsr_pg.to_sql(f'{self.site_name}_bs_category_asin_detail_month_{date_info_}',
                                          con=self.engine_pg,
                                          if_exists='append',
                                          index=False)
                bs_category_asin_list_pg = []
                break
            except Exception as e:
                print("存储 存储 asin bsr 文本 数据错误 mysql", e)
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                continue

    @func_set_timeout(240)
    def save_buyBoxname_url(self, buyBox_list):
        while True:
            try:
                if buyBox_list:
                    print('存储店铺 syn表：', self.db_seller_account_syn, len(buyBox_list))
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    df_seller_id = pd.DataFrame(data=buyBox_list, columns=['seller_id', 'account_name', 'url'])
                    df_seller_id.drop_duplicates(['seller_id'], inplace=True)  # 去重
                    df_seller_id_list = df_seller_id.values.tolist()
                    print(len(df_seller_id_list))
                    with self.engine.begin() as conn:
                        conn.execute(
                            f"insert into {self.db_seller_account_syn} (seller_id, account_name, url) values (%s, %s, %s) ON DUPLICATE KEY UPDATE seller_id = values(seller_id)",
                            df_seller_id_list)
                    buyBox_list = []
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"存储'{self.db_seller_account_syn}' 存储卖家 name, url失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                time.sleep(5)
                continue

    @func_set_timeout(240)
    def save_buyBoxname_asin(self, buyBoxname_asin_list):
        # 存储卖家信息, asin, url
        while True:
            try:
                if buyBoxname_asin_list:
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    print(f"存储 save_buyBoxname_asin ========={len(buyBoxname_asin_list)}")
                    df_seller_asin_account = pd.DataFrame(data=buyBoxname_asin_list,
                                                          columns=['account_name', 'asin', 'seller_id'])
                    df_seller_asin_account.drop_duplicates(['seller_id', 'asin'], inplace=True)  # 去重
                    if df_seller_asin_account.shape[0] > 0:
                        with self.engine.begin() as conn:
                            if len(set(df_seller_asin_account.asin)) == 1:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in ('{tuple(df_seller_asin_account.asin)[0]}');"
                            else:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in {tuple(set(df_seller_asin_account.asin))};"
                            conn.execute(sql_delete)
                        df_seller_asin_account.to_sql(self.db_seller_asin_account, con=self.engine,
                                                      if_exists='append',
                                                      index=False)
                buyBoxname_asin_list = []
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"存储'{self.db_seller_asin_account}'存储卖家信息, asin 失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                continue
