import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from func_timeout import func_set_timeout
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
import traceback
import time
import random
from amazon_spider.VPS_IP import is_internet_available
from redis.exceptions import LockError

print('存储 asin 到pg数据库')


class Save_asin_detail(BaseUtils):
    def __init__(self, site_name=None, proxy_name=None, week=None, month=None, spider_int=None):
        super().__init__()
        self.site_name = site_name  # 站点
        self.month = month
        self.spider_int = spider_int
        self.asin_detail_list = []
        self.read_size = 180
        print("测试  测试  测试  存储 pg  调用 配置 proxy_name：", proxy_name)
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, proxy_name=proxy_name)
        self.init_db_names()
        self.cols = self.reuests_para_val.db_column(site_name)
        self.redis_client = self.redis_db()
    def init_db_names(self):
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()  # 更改变体 时 存储 变体表 使用 self.engine
        self.kafuka_producer = self.kafuka_connect()  # 卡夫卡连接
        self.kafuka_producer_str = self.kafuka_connect(acks=True,connections_max_idle_ms=300000)  # 卡夫卡连接
        self.redis_db14 = self.redis_db()  # redis 链接
        self.db_syn = self.site_name + '_all_syn_st_month_2025'
        self.db_seller_account_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_syn'][2:] + '_distinct'
        self.db_seller_asin_account = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_asin_account'][2:]
        self.minid_maxid_list = self.reuests_para_val.get_minid_maxid(site_name=self.site_name, state=1,
                                                                      minid_maxid=None, month=self.month)

    @func_set_timeout(240)
    def process_item(self, item_queue, requests_error_asin_list, asin_list_update, asin_not_found_list,
                     asin_not_sure_list, asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                     asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                     star_list, add_cart_asin_list, bs_category_asin_list, week_):
        print("=================开始存储数据======================")
        while True:
            if item_queue.empty() == False:
                item = item_queue.get()
                for i in item:
                    if item.get(i) == 'null' or item.get(i) == 'None' or item.get(i) == 'none' or item.get(i) == '':
                        item[i] = None
                item_list = []
                # 需要存到数据库的字段
                if item['volume']:
                    if len(item['volume']) > 38:
                        item['volume'] = None
                for i in self.cols:
                    item_list.append(item[i])
                self.asin_detail_list.append(item_list)
            else:
                if item_queue.empty():
                    self.save_data(requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                                   asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list,
                                   asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list,
                                   star_list, add_cart_asin_list, bs_category_asin_list, week_)
                    self.asin_detail_list = []
                    print("结束--跳出--存储")
                    break

    def read_db_data2(self):
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                sql_read = f"SELECT asin, id, date_info, asin_is_variation,data_type,volume,weight_str FROM {self.db_syn}_{self.month} WHERE STATE = 1 ORDER BY id FOR UPDATE SKIP LOCKED LIMIT {self.read_size}"
                print(sql_read)
                self.df_read = self.engine_pg.read_then_update(
                    select_sql=sql_read,
                    update_table=f"{self.db_syn}_{self.month}",
                    set_values={"state": 2},  # 把库存清零
                    where_keys=["id"],  # WHERE sku = :sku
                )
                self.df_read.drop_duplicates(['asin'], inplace=True)
                if self.df_read.shape[0] > 0:
                    self.index_tuple = tuple(self.df_read['id'])
                    print(self.index_tuple,'self.index_tuplself.index_tuplself.index_tupl')
                    # 使用默认值填充空值
                    self.df_read['volume'].fillna('null', inplace=True)
                    self.df_read['weight_str'].fillna('null', inplace=True)

                    asin_list = list(
                        self.df_read.asin + '|' + self.df_read.date_info + '|' + self.df_read.asin_is_variation.astype(
                            "U") + '|' + self.df_read.data_type.astype("U") + '|' + self.df_read.volume.astype(
                            "U") + '|' + self.df_read.weight_str.astype("U"))

                    return asin_list
                else:
                    return []
            except LockError:
                print("获取锁失败1111,其他程序正在查询")
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                continue

    def read_db_data(self):
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                if self.minid_maxid_list:
                    minid, maxid = self.minid_maxid_list[0].split('-')
                    sql_read = f"SELECT asin, id, date_info, asin_is_variation,data_type,volume,weight_str FROM {self.db_syn}_{self.month} WHERE state = 1  AND id BETWEEN {minid} AND {maxid} ORDER BY id FOR UPDATE SKIP LOCKED LIMIT {self.read_size};"
                    print(sql_read)
                    # self.df_read = self.engine_pg.read_sql(sql_read)
                    self.df_read = self.engine_pg.read_then_update(
                        select_sql=sql_read,
                        update_table=f"{self.db_syn}_{self.month}",
                        set_values={"state": 2},  # 把库存清零
                        where_keys=["id"],  # WHERE sku = :sku
                    )
                    self.df_read.drop_duplicates(['asin'], inplace=True)
                    if self.df_read.shape[0] > 0:
                        # 使用默认值填充空值
                        self.df_read['volume'].fillna('null', inplace=True)
                        self.df_read['weight_str'].fillna('null', inplace=True)
                        self.index_tuple = tuple(self.df_read['id'])
                        asin_list = list(
                            self.df_read.asin + '|' + self.df_read.date_info + '|' + self.df_read.asin_is_variation.astype(
                                "U") + '|' + self.df_read.data_type.astype("U") + '|' + self.df_read.volume.astype(
                                "U") + '|' + self.df_read.weight_str.astype("U"))

                        return asin_list
                    else:

                        print('重新获取', self.minid_maxid_list[0], '无数据')
                        self.minid_maxid_list = self.reuests_para_val.get_minid_maxid(site_name=self.site_name,
                                                                                      state=3,
                                                                                      minid_maxid=
                                                                                      self.minid_maxid_list[0],
                                                                                      month=self.month)

                else:
                    asin_list = self.read_db_data2()
                    if asin_list:
                        return asin_list
                    else:
                        return []
            except LockError:
                print("获取锁失败,其他程序正在查询")
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                time.sleep(15)
                continue

    def split_list(self, lst, chunk_size):
        for i in range(0, len(lst), chunk_size):
            yield lst[i:i + chunk_size]

    def save_data(self, requests_error_asin_list, asin_list_update, asin_not_found_list, asin_not_sure_list,
                  asin_not_foot_list, asin_not_foot2_list, asin_not_buyBox_list, asin_not_response_list,
                  asin_not_redirect_list, asin_not_div_id_dp_list, star_list,
                  add_cart_asin_list, bs_category_asin_list, week_):
        self.asin_not_buyBox_list = asin_not_buyBox_list
        self.asin_not_foot_list = asin_not_foot_list
        df_asin_detail = pd.DataFrame(data=self.asin_detail_list, columns=self.cols)
        self.asin_list_update = list(df_asin_detail.asin)

        asin_week_list = [[asin, str(week)] for asin, week in
                          zip(list(df_asin_detail['asin']), list(df_asin_detail['date_info']))]
        asin_week_dict = {}
        for asin_week in asin_week_list:
            asin, week_list = asin_week[0], str(asin_week[1])
            week_list = week_list.split(",")
            for week in week_list:
                if week not in asin_week_dict.keys():
                    asin_week_dict[week] = []
                var_list = asin_week_dict[week]
                var_list.append(asin)
                asin_week_dict[week] = var_list

        for week, asin_list in asin_week_dict.items():
            report_info = week.replace('-', "_")
            while True:
                try:
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    print(f"===============存储pg詳情數據数据=={self.site_name}_asin_detail_month_{report_info}=====")
                    df = df_asin_detail.loc[df_asin_detail.asin.isin(asin_week_dict[week])]
                    df = df.loc[:, self.cols]
                    df.ac_name = df.ac_name.apply(lambda x: str(x)[:100] if x is not None else None)  # 截取字符
                    df.brand = df.brand.apply(lambda x: str(x)[:100] if x is not None else None)  # 截取字符
                    df.title = df.title.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
                    df.category = df.category.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
                    df.img_url = df.img_url.apply(lambda x: str(x)[:400] if x is not None else None)  # 截取字符
                    df.material = df.material.apply(lambda x: str(x)[:150] if x is not None else None)  # 截取字符
                    df.volume = df.volume.apply(lambda x: str(x)[:50] if x is not None else None)  # 截取字符
                    df.package_quantity = df.package_quantity.apply(
                        lambda x: str(x)[:50] if x is not None else None)  # 截取字符
                    df.pattern_name = df.pattern_name.apply(lambda x: str(x)[:50] if x is not None else None)  # 截取字符
                    df.weight_str = df.weight_str.apply(lambda x: str(x)[:250] if x is not None else None)  # 截取字符

                    print(f'存储pg：{self.site_name}_asin_detail_month_{report_info}')
                    # df.to_csv(r'2025-7-30_srs_search_term_asin.csv', index=False)
                    self.engine_pg.to_sql(df,f"{self.site_name}_asin_detail_month_{report_info}",
                              if_exists='append')
                    break
                except Exception as e:
                    traceback.print_exc()  # ★ 打印完整栈到终端
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                    time.sleep(random.uniform(10, 20.5))
                    print(f"打印完整栈到终端 存储'{self.site_name} 存储詳情數據 数据'失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                    time.sleep(5)
                    continue

        if requests_error_asin_list:
            if self.site_name == 'us':
                self.db_change_state(state=1, asin_list=requests_error_asin_list)
            else:
                self.db_change_state(state=1, asin_list=requests_error_asin_list)
        if self.asin_list_update:
            self.db_change_state(state=3, asin_list=self.asin_list_update)
            self.asin_list_update = []
        if asin_not_found_list:
            self.db_change_state(state=4, asin_list=asin_not_found_list)
        if asin_not_sure_list:
            self.db_change_state(state=6, asin_list=asin_not_sure_list)
        if asin_not_foot_list:
            self.db_change_state(state=7, asin_list=asin_not_foot_list)  # 没有脚
        if asin_not_foot2_list:
            self.db_change_state(state=8, asin_list=asin_not_foot2_list)
        if asin_not_buyBox_list:
            self.db_change_state(state=9, asin_list=asin_not_buyBox_list)
        if asin_not_response_list:
            self.db_change_state(state=10, asin_list=asin_not_response_list)
        if asin_not_redirect_list:
            self.db_change_state(state=12, asin_list=asin_not_redirect_list)
        if asin_not_div_id_dp_list:
            self.db_change_state(state=13, asin_list=asin_not_div_id_dp_list)

    @func_set_timeout(240)
    def save_bs_category_asin_detail(self, bs_category_asin_list_pg):
        # 存储 asin bsr 文本
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                print("存储 asin bsr 文本 存储pg", len(bs_category_asin_list_pg))
                df_asin_bsr_pg = pd.DataFrame(data=bs_category_asin_list_pg,
                                              columns=['asin', 'date_info', 'best_sellers_rank', 'last_herf',
                                                       'all_best_sellers_href'])
                df_asin_bsr_pg.drop_duplicates(['asin'], inplace=True)  # 去重
                df_asin_bsr_pg.best_sellers_rank = df_asin_bsr_pg.best_sellers_rank.apply(
                    lambda x: str(x)[:800] if x is not None else None)  # 截取字符
                df_asin_bsr_pg['spider_int'] = self.spider_int
                if df_asin_bsr_pg.shape[0] > 0:
                    date_info_ = list(df_asin_bsr_pg.date_info)[0].replace('-', '_')
                    print(f'{self.site_name}_bs_category_asin_detail_month_{date_info_}')
                    self.engine_pg.to_sql(df_asin_bsr_pg,f'{self.site_name}_bs_category_asin_detail_month_{date_info_}',
                                          if_exists='append')
                bs_category_asin_list_pg = []
                break
            except Exception as e:
                print("存储 存储 asin bsr 文本 数据错误 mysql", e)
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                continue

    @func_set_timeout(240)
    def save_buyBoxname_url(self, buyBox_list):
        while True:
            try:
                if buyBox_list:
                    print('存储店铺 syn表：', self.db_seller_account_syn, len(buyBox_list))
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    df_seller_id = pd.DataFrame(data=buyBox_list, columns=['seller_id', 'account_name', 'url'])
                    df_seller_id.drop_duplicates(['seller_id'], inplace=True)  # 去重
                    df_seller_id_list = df_seller_id.values.tolist()
                    print(len(df_seller_id_list))
                    with self.engine.begin() as conn:
                        conn.execute(
                            f"insert into {self.db_seller_account_syn} (seller_id, account_name, url) values (%s, %s, %s) ON DUPLICATE KEY UPDATE seller_id = values(seller_id)",
                            df_seller_id_list)
                    buyBox_list = []
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"存储'{self.db_seller_account_syn}' 存储卖家 name, url失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                time.sleep(5)
                continue

    @func_set_timeout(240)
    def save_buyBoxname_asin(self, buyBoxname_asin_list):
        # 存储卖家信息, asin, url
        while True:
            try:
                if buyBoxname_asin_list:
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    print(f"存储 save_buyBoxname_asin ========={len(buyBoxname_asin_list)}")
                    df_seller_asin_account = pd.DataFrame(data=buyBoxname_asin_list,
                                                          columns=['account_name', 'asin', 'seller_id'])
                    df_seller_asin_account.drop_duplicates(['seller_id', 'asin'], inplace=True)  # 去重
                    if df_seller_asin_account.shape[0] > 0:
                        with self.engine.begin() as conn:
                            if len(set(df_seller_asin_account.asin)) == 1:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in ('{tuple(df_seller_asin_account.asin)[0]}');"
                            else:
                                sql_delete = f"delete from {self.db_seller_asin_account} where asin in {tuple(set(df_seller_asin_account.asin))};"
                            conn.execute(sql_delete)
                        self.engine.to_sql(df_seller_asin_account,self.db_seller_asin_account,
                                                      if_exists='append')
                buyBoxname_asin_list = []
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"存储'{self.db_seller_asin_account}'存储卖家信息, asin 失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                continue

    def db_change_state(self, state=2, asin_list=None):
        self.db_change_state_common(state=state, asin_list=asin_list)
        self.asin_not_buyBox_list = []
        self.asin_not_foot_list = []

    def db_change_state_common(self, state=None, asin_list=None):
        print(f"==================== 存储状态 {state} 数据 ========== {len(asin_list)} ========")
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        if state == 3:
            # 剔除状态 7，9 的id
            df = self.df_read.loc[
                (self.df_read.asin.isin(asin_list)) & ~(self.df_read.asin.isin(self.asin_not_foot_list)) & ~(
                    self.df_read.asin.isin(self.asin_not_buyBox_list))]
        id_tuple = tuple(df.id)
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                    self.engine_pg = self.pg_connect()
                with self.engine_pg.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_syn}_{self.month} set state={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_syn}_{self.month} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"更改{self.db_syn}_{self.month}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                time.sleep(15)
                continue

        if state != 1:
            self.update_all_syn_asin(asin_list, state)

    def update_all_syn_asin(self, asin_list, state):
        print(f'更新历史asin的 状态 {state} 数量 {len(asin_list)}')
        while True:
            try:
                if asin_list:
                    if is_internet_available():
                        pass
                    else:
                        self.engine = self.mysql_connect()
                        self.engine_pg = self.pg_connect()
                    two_dimensional_list = [[x] for x in asin_list]
                    df_asin = pd.DataFrame(data=two_dimensional_list, columns=['asin'])
                    df_asin.drop_duplicates(['asin'], inplace=True)  # 去重
                    with self.engine_pg.begin() as conn:
                        if len(set(df_asin.asin)) == 1:
                            sql_delete = f"delete from {self.site_name}_all_syn_st_asin where asin in ('{tuple(df_asin.asin)[0]}');"
                        else:
                            sql_delete = f"delete from {self.site_name}_all_syn_st_asin where asin in {tuple(set(df_asin.asin))};"
                        conn.execute(sql_delete)
                    df_asin['state'] = state
                    self.engine_pg.to_sql(df_asin,f'{self.site_name}_all_syn_st_asin',if_exists='append')
                break
            except Exception as e:
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                time.sleep(random.uniform(10, 20.5))
                print(f"存储'{self.site_name}_all_syn_st_asin'等待5s继续", e,
                      f"\n{traceback.format_exc()}")
                continue


if __name__ == '__main__':
    Save_asin_detail()
