import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
import traceback
import time
from func_timeout import func_set_timeout
from amazon_spider.VPS_IP import is_internet_available
import math
import numpy as np

print('存储  asin 到 MySQL')


class Save_asin_detail(BaseUtils):
    def __init__(self, site_name=None, proxy_name=None, week=None):
        super().__init__()
        self.site_name = site_name  # 站点
        self.asin_detail_list = []
        self.stop_item_queue = True
        self.year_week = f'2024_{week}'
        self.read_size = 300
        print(self.year_week, "存储 调用 配置 proxy_name：", proxy_name)
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, proxy_name=proxy_name)
        self.cols = self.reuests_para_val.db_column(site_name)
        self.init_db_names()


    def init_db_names(self):
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()  # 更改变体 时 存储 变体表 使用 self.engine
        self.kafuka_producer = self.kafuka_connect()  # 卡夫卡连接
        self.db_variat = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_variat'][2:]
        self.db_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_syn'][2:]
        self.db_seller_account_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_syn'][2:] + '_distinct'
        self.db_seller_asin_account = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_asin_account'][2:]
        self.db_asin_image = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_asin_image'][2:] + '_pyb'
        self.db_bs_category_asin_detail = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_bs_category_asin_detail'][2:]
        self.minid_maxid_list = self.reuests_para_val.get_minid_maxid(site_name=self.site_name, state=1, minid_maxid=None,
                                                     year_week=self.year_week)

    @func_set_timeout(240)
    def process_item(self, item_queue, requests_error_asin_list, asin_list_update, asin_not_found_list,
                     asin_not_sure_list, asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                     asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                     star_list, add_cart_asin_list, week_):
        print("=================开始存储数据======================")
        while True:
            if item_queue.empty() == False:
                item = item_queue.get()
                for i in item:
                    if item.get(i) == 'null' or item.get(i) == 'None' or item.get(i) == 'none' or item.get(i) == '':
                        item[i] = None
                if item['volume']:
                    if len(item['volume']) > 38:
                        item['volume'] = None
                # item['title'].replace(' ', ' ')
                item_list = []
                # 需要存到数据库的字段
                for i in self.cols:
                    item_list.append(item[i])
                self.asin_detail_list.append(item_list)
            else:
                if item_queue.empty():
                    self.save_data(requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                                   asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                                   asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                                   star_list, add_cart_asin_list, week_)
                    self.asin_detail_list = []
                    print("结束--跳出--存储")
                    break

    def read_db_data2(self):
        while True:
            try:
                self.engine_pg = self.pg_connect()
                with self.engine_pg.begin() as conn:
                    sql_read = f'SELECT asin, id, date_info, asin_is_variation,data_type,volume,weight_str FROM {self.db_syn}_{self.year_week} WHERE STATE=1 LIMIT 300  for update;'
                    print(sql_read)
                    a = conn.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=['asin', 'id', 'date_info', 'asin_is_variation',
                                                            'data_type', 'volume', 'weight_str'])
                    self.df_read.drop_duplicates(['asin'], inplace=True)
                    if self.df_read.shape[0] == 0:
                        print('*********** asin 数据抓取 完毕 *****************')
                        self.stop_item_queue = False
                        return []
                    # 使用默认值填充空值
                    self.df_read['volume'].fillna('null', inplace=True)
                    self.df_read['weight_str'].fillna('null', inplace=True)
                    self.index_tuple = tuple(self.df_read['id'])
                    if len(self.index_tuple) == 1:
                        sql_update = f"""UPDATE {self.db_syn}_{self.year_week} a set state=2 where a.id in ({self.index_tuple[0]})"""
                    else:
                        sql_update = f"""UPDATE {self.db_syn}_{self.year_week} a set state=2 where a.id in {self.index_tuple}"""
                    conn.execute(sql_update)
                    asin_list = list(
                        self.df_read.asin.astype("U") + '|' + self.df_read.date_info.astype(
                            "U") + '|' + self.df_read.asin_is_variation.astype(
                            "U") + '|' + self.df_read.data_type.astype("U") + '|' + self.df_read.volume.astype(
                            "U") + '|' + self.df_read.weight_str.astype("U"))
                    return asin_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                self.engine_pg = self.pg_connect()
                continue

    def read_db_data(self):
        while True:
            try:
                self.engine_pg = self.pg_connect()
                if self.minid_maxid_list:
                    minid, maxid = self.minid_maxid_list[0].split('-')
                    with self.engine_pg.begin() as conn:
                        sql_read = f"SELECT asin, id, date_info, asin_is_variation,data_type,volume,weight_str FROM {self.db_syn}_{self.year_week} WHERE state=1 and id BETWEEN {minid} AND {maxid} limit {self.read_size} for update;"
                        print(sql_read)
                        a = conn.execute(sql_read)
                        self.df_read = pd.DataFrame(a, columns=['asin', 'id', 'date_info', 'asin_is_variation',
                                                                'data_type', 'volume', 'weight_str'])
                        self.df_read.drop_duplicates(['asin'], inplace=True)
                        if self.df_read.shape[0] == 0:
                            print('重新获取', self.minid_maxid_list[0], '无数据')
                            self.minid_maxid_list = self.reuests_para_val.get_minid_maxid(site_name=self.site_name, state=3,
                                                                         minid_maxid=self.minid_maxid_list[0],
                                                                         year_week=self.year_week)
                        else:
                            self.df_read['volume'].fillna('null', inplace=True)
                            self.df_read['weight_str'].fillna('null', inplace=True)
                            self.index_tuple = tuple(self.df_read['id'])
                            if len(self.index_tuple) == 1:
                                sql_update = f"""UPDATE {self.db_syn}_{self.year_week} a set state=2 where a.id in ({self.index_tuple[0]})"""
                            else:
                                sql_update = f"""UPDATE {self.db_syn}_{self.year_week} a set state=2 where a.id in {self.index_tuple}"""
                            conn.execute(sql_update)
                            asin_list = list(
                                self.df_read.asin + '|' + self.df_read.date_info + '|' + self.df_read.asin_is_variation.astype(
                                    "U") + '|' + self.df_read.data_type.astype("U") + '|' + self.df_read.volume.astype(
                                    "U") + '|' + self.df_read.weight_str.astype("U"))
                            return asin_list
                else:
                    asin_list = self.read_db_data2()
                    if asin_list:
                        return asin_list
                    else:
                        return []
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                self.engine_pg = self.pg_connect()
                time.sleep(15)
                continue

    # 分割变体 list 分批存储
    def split_list(self, lst, chunk_size):
        for i in range(0, len(lst), chunk_size):
            yield lst[i:i + chunk_size]

    def save_data(self, requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                  asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list, asin_not_response_list,
                  asin_not_redirect_list, asin_not_div_id_dp_list, star_list,
                  add_cart_asin_list, week_):

        self.asin_not_buyBox_list = asin_not_buyBox_list
        self.asin_not_foot_list = asin_not_foot_list
        df_asin_detail = pd.DataFrame(data=self.asin_detail_list, columns=self.cols)
        self.asin_list_update = list(df_asin_detail.asin)
        print("============存储詳情數據数据=======::: ", len(self.asin_list_update))
        if requests_error_asin_list:
            if self.site_name == 'us':
                self.db_change_state(state=1, asin_list=requests_error_asin_list)
            else:
                self.db_change_state(state=1, asin_list=requests_error_asin_list)
        if self.asin_list_update:
            self.db_change_state(state=3, asin_list=self.asin_list_update)
            self.asin_list_update = []
        if asin_not_found_list:
            self.db_change_state(state=4, asin_list=asin_not_found_list)
        if asin_not_sure_list:
            self.db_change_state(state=6, asin_list=asin_not_sure_list)
        if asin_not_foot_list:
            self.db_change_state(state=7, asin_list=asin_not_foot_list)  # 没有脚
        if asin_not_foot2_list:
            self.db_change_state(state=8, asin_list=asin_not_foot2_list)
        if asin_not_buyBox_list:
            self.db_change_state(state=9,
                                 asin_list=asin_not_buyBox_list)  # 只有图片和标题，其他数据没有 https://www.amazon.com/dp/B016IBFUWC/qid=1637397113
        if asin_not_response_list:
            self.db_change_state(state=10,
                                 asin_list=asin_not_response_list)  # https://www.amazon.com/dp/B08G3HL4PR 返回空白 html
        if asin_not_redirect_list:
            self.db_change_state(state=12, asin_list=asin_not_redirect_list)
        if asin_not_div_id_dp_list:
            self.db_change_state(state=13, asin_list=asin_not_div_id_dp_list)

    @func_set_timeout(350)
    def save_asin_variation(self, asin_variation_list):
        # 存變體
        if asin_variation_list:
            while True:
                try:
                    self.engine = self.mysql_connect()
                    df_asin_variation = pd.DataFrame(data=asin_variation_list,
                                                     columns=['asin', 'color', 'parent_asin', 'size', 'state',
                                                              'style', 'column_2'])
                    df_asin_variation.drop_duplicates(['asin', 'parent_asin'], inplace=True)  # 去重
                    print(f"{self.db_variat}====变体信息 {len(asin_variation_list)}:")
                    if df_asin_variation.shape[0] > 0:
                        batch_size = 90
                        df_parent_asin = df_asin_variation[['parent_asin']].drop_duplicates()
                        chunks_parent_asin = np.array_split(df_parent_asin, len(df_parent_asin) // batch_size + 1)
                        chunks_asins = np.array_split(df_asin_variation, len(df_asin_variation) // batch_size + 1)
                        with self.engine.begin() as conn:
                            for chunk_parent in chunks_parent_asin:
                                parent_asin_values = ', '.join([f"'{value}'" for value in chunk_parent['parent_asin']])
                                if parent_asin_values:
                                    sql_delete_parent_asin = f"DELETE FROM {self.db_variat} WHERE parent_asin IN ({parent_asin_values});"
                                    conn.execute(sql_delete_parent_asin)
                            for chunk_asin in chunks_asins:
                                asin_values = ', '.join([f"'{value}'" for value in chunk_asin['asin']])
                                if asin_values:
                                    sql_delete_asin = f"DELETE FROM {self.db_variat} WHERE asin IN ({asin_values});"
                                    conn.execute(sql_delete_asin)
                        df_asin_variation.color = df_asin_variation.color.apply(
                            lambda x: str(x)[:180] if x is not None else None)  # 截取前150字符
                        df_asin_variation['size'] = df_asin_variation['size'].apply(
                            lambda x: str(x)[:180] if x is not None else None)  # 截取前150字符
                        df_asin_variation['style'] = df_asin_variation['style'].apply(
                            lambda x: str(x)[:180] if x is not None else None)  # 截取前150字符
                        df_asin_variation['column_2'] = df_asin_variation['column_2'].apply(
                            lambda x: str(x)[:180] if x is not None else None)  # 截取前150字符
                        df_asin_variation.to_sql(f'{self.db_variat}', con=self.engine, if_exists='append',
                                                 index=False)
                        print('存储变体成功：', df_asin_variation.shape)
                    break
                except Exception as e:
                    time.sleep(5)
                    print(f"存储'{self.db_variat}'存变体信息 失败，等待5s继续", e, f"\n{traceback.format_exc()}")

    @func_set_timeout(240)
    def save_bs_category_asin_detail(self, bs_category_asin_list_pg):
        # 存储 asin bsr 文本
        while True:
            try:
                if bs_category_asin_list_pg:
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    df_asin_bsr = pd.DataFrame(data=bs_category_asin_list_pg,
                                               columns=['asin', 'date_info', 'best_sellers_rank', 'last_herf',
                                                        'all_best_sellers_href'])
                    df_asin_bsr.drop_duplicates(['asin'], inplace=True)  # 去重
                    print(
                        f"asin bsr文本{len(bs_category_asin_list_pg)}-{self.db_bs_category_asin_detail}_{self.year_week}")
                    df_asin_bsr.to_sql(self.db_bs_category_asin_detail + f'_{self.year_week}', con=self.engine_pg,
                                       if_exists='append',
                                       index=False)
                    bs_category_asin_list_pg = []
                break
            except Exception as e:
                print("存储 存储 asin bsr 文本 数据错误", e)
                self.engine_pg = self.pg_connect()
                time.sleep(12)
                continue

    @func_set_timeout(240)
    def save_buyBoxname_url(self, buyBox_list):
        while True:
            try:
                if buyBox_list:
                    print('存储店铺 syn表：', self.db_seller_account_syn, len(buyBox_list))
                    self.engine = self.mysql_connect()
                    df_seller_id = pd.DataFrame(data=buyBox_list, columns=['seller_id', 'account_name', 'url'])
                    df_seller_id.drop_duplicates(['seller_id'], inplace=True)  # 去重
                    df_seller_id_list = df_seller_id.values.tolist()
                    print(len(df_seller_id_list))
                    with self.engine.begin() as conn:
                        conn.execute(
                            f"insert into {self.db_seller_account_syn} (seller_id, account_name, url) values (%s, %s, %s) ON DUPLICATE KEY UPDATE seller_id = values(seller_id)",
                            df_seller_id_list)
                    buyBox_list = []
                break
            except Exception as e:
                print(f"存储'{self.db_seller_account_syn}'存储 店铺url，等待5s继续", e, f"\n{traceback.format_exc()}")
                continue

    @func_set_timeout(240)
    def save_buyBoxname_asin(self, buyBoxname_asin_list):
        # 存储卖家信息, asin, url
        while True:
            try:
                if buyBoxname_asin_list:
                    print(f"=c存储 save_buyBoxname_asin ========={len(buyBoxname_asin_list)}")
                    df_seller_asin_account = pd.DataFrame(data=buyBoxname_asin_list,
                                                          columns=['account_name', 'asin', 'seller_id'])
                    df_seller_asin_account.drop_duplicates(['seller_id', 'asin'], inplace=True)  # 去重
                    if df_seller_asin_account.shape[0] > 0:
                        self.engine = self.mysql_connect()
                        with self.engine.begin() as conn:
                            if len(set(df_seller_asin_account.asin)) == 1:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in ('{tuple(df_seller_asin_account.asin)[0]}');"
                            else:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in {tuple(set(df_seller_asin_account.asin))};"
                            conn.execute(sql_delete)
                        df_seller_asin_account.to_sql(self.db_seller_asin_account, con=self.engine, if_exists='append',
                                                      index=False)
                buyBoxname_asin_list = []
                break
            except Exception as e:
                print(f"存储'{self.db_seller_asin_account}'存储卖家信息, asin 失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                continue

    # @func_set_timeout(240)
    # def save_img_video(self, all_img_video_list):
    #     # 使用字典来进行去重
    #     seen_asin = {}
    #     unique_batch = []
    #     for item in all_img_video_list:
    #         asin = item[0]
    #         if asin not in seen_asin:
    #             seen_asin[asin] = True
    #             unique_batch.append(item)
    #     batch_size = 20  # 每个批次的大小
    #     num_batches = math.ceil(len(unique_batch) / batch_size)
    #     for i in range(num_batches):
    #         start_idx = i * batch_size
    #         end_idx = (i + 1) * batch_size
    #         current_batch = unique_batch[start_idx:end_idx]
    #         # 提取内部列表的第一个元素并构建 asin 值
    #         asin_values = "','".join([item[0] for item in current_batch])
    #         # 构建删除语句
    #         while True:
    #             try:
    #                 with self.engine_pg.begin() as conn:
    #                     sql_delete = f"delete from {self.db_asin_image} where asin IN ('{asin_values}')"
    #                     print(sql_delete)
    #                     conn.execute(sql_delete)
    #                 break
    #             except Exception as e:
    #                 self.engine = self.mysql_connect()
    #                 self.engine_pg = self.pg_connect()
    #                 print(f"存储'{self.db_asin_image}'删除-位置 失败，等待5s继续", e,
    #                       f"\n{traceback.format_exc()}")
    #     while True:
    #         try:
    #             if all_img_video_list:
    #                 self.engine = self.mysql_connect()
    #                 self.engine_pg = self.pg_connect()
    #                 print(f"{self.db_asin_image}存储图片----视频----{len(all_img_video_list)}--")
    #                 df_asin_img_video = pd.DataFrame(data=all_img_video_list,
    #                                                  columns=['asin', 'img_url', 'img_order_by', 'data_type'])
    #                 df_asin_img_video.to_sql(self.db_asin_image, con=self.engine_pg, if_exists='append',
    #                                          index=False)
    #                 all_img_video_list = []
    #             break
    #         except Exception as e:
    #             print(f"存储'{self.db_asin_image}'存储图片----视频-----url----位置 失败，等待5s继续", e,
    #                   f"\n{traceback.format_exc()}")
    #             continue

    def db_change_state(self, state=2, asin_list=None):
        self.db_change_state_common(state=state, asin_list=asin_list)
        self.asin_not_buyBox_list = []
        self.asin_not_foot_list = []

    def db_change_state_common(self, state, asin_list):
        print(f"==================== 存储状态 {state} 数据 ========== {len(asin_list)} ========")
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        if state == 3:
            # 剔除状态 7，9 的id
            df = self.df_read.loc[
                (self.df_read.asin.isin(asin_list)) & ~(self.df_read.asin.isin(self.asin_not_foot_list)) & ~(
                    self.df_read.asin.isin(self.asin_not_buyBox_list))]
        id_tuple = tuple(df.id)
        while True:
            try:
                self.engine_pg = self.pg_connect()
                with self.engine_pg.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_syn}_{self.year_week} set state={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_syn}_{self.year_week} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_syn}_{self.year_week}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                time.sleep(15)
                self.engine_pg = self.pg_connect()
                continue
