import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
import traceback

print('存储竞品asin 数据')

class Save_asin_competitive(BaseUtils):
    def __init__(self, site_name=None):
        super().__init__()
        self.site_name = site_name  # 站点
        self.asin_detail_list = []
        self.stop_item_queue = True
        self.init_db_names()
        self.read_size = 100
        # 解析
        if site_name == 'us':
            self.asin_cols = ['asin', 'title', 'img_url', 'rating', 'total_comments', 'price', "rank", 'category',
                              'launch_time',
                              'volume', 'weight', 'title_len', "video_url",
                              "add_url", 'material', 'sp_num', 'activity_type', 'one_two_val', 'three_four_val'
                              ]
        else:
            self.asin_cols = ['asin', 'title', 'img_url', 'rating', 'total_comments', 'price', "rank", 'category',
                              'launch_time',
                              'volume', 'weight', 'title_len', "video_url",
                              "add_url", 'material', 'sp_num', 'activity_type', 'one_two_val', 'three_four_val',
                              'five_six_val']
        self.reuests_para_val = Requests_param_val(site_name=self.site_name)

    def init_db_names(self):
        """
                1. 初始化数据库连接
                2. 初始化数据库表名
        """
        self.engine = self.mysql_connect()
        self.db_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_competitive_aisn_syn'][2:]
        self.db_competitive_aisn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_competitive_aisn'][2:]

    def process_item(self, item_queue, requests_error_asin_list, asin_not_found_list, asin_not_foot2_list,
                     asin_not_response_list, asin_not_redirect_list, asin_not_div_id_dp_list):
        print("=================开始存储数据======================")
        self.requests_error_asin_list = requests_error_asin_list
        self.asin_not_found_list = asin_not_found_list
        self.asin_not_foot2_list = asin_not_foot2_list
        self.asin_not_response_list = asin_not_response_list
        self.asin_not_redirect_list = asin_not_redirect_list
        self.asin_not_div_id_dp_list = asin_not_div_id_dp_list

        while True:
            if item_queue.empty() == False:
                item = item_queue.get()
                item_list = []
                # 需要存到数据库的字段
                item_list.append(item['asin'])
                item_list.append(item['title'])
                item_list.append(item['img_url'])
                item_list.append(item['rating'])
                item_list.append(item['total_comments'])
                item_list.append(item['price'])
                item_list.append(item['rank'])
                item_list.append(item['category'])
                item_list.append(item['launch_time'])
                item_list.append(item['volume'])
                item_list.append(item['weight'])
                item_list.append(item['title_len'])
                item_list.append(item['video_url'])
                item_list.append(item['add_url'])
                item_list.append(item['material'])
                item_list.append(item['sp_num'])
                item_list.append(item['activity_type'])
                item_list.append(item['one_two_val'])
                item_list.append(item['three_four_val'])
                if self.site_name == 'us':
                    pass
                else:
                    item_list.append(item['five_six_val'])
                self.asin_detail_list.append(item_list)
            else:
                if item_queue.empty():
                    self.save_data()
                    self.asin_detail_list = []
                    print("结束--跳出--存储")
                    break

    def read_db_data(self):
        while True:
            try:
                with self.engine.begin() as conn:
                    sql_read = f'SELECT asin,id FROM {self.db_syn} WHERE STATE=1 and length(asin) =10  LIMIT {self.read_size} for update;'
                    print(sql_read)
                    a = conn.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=['asin', 'id'])
                    self.df_read.drop_duplicates(['asin'], inplace=True)
                    if self.df_read.shape[0] == 0:
                        print("*************** us_competitive_aisn 详情抓取完毕 ****************")
                        self.stop_item_queue = False
                        self.spider_de_feedback = True
                        return []
                    self.index_tuple = tuple(self.df_read['id'])
                    if len(self.index_tuple) == 1:
                        sql_update = f"""UPDATE {self.db_syn} a set state=2 where a.id in ({self.index_tuple[0]})"""
                    else:
                        sql_update = f"""UPDATE {self.db_syn} a set state=2 where a.id in {self.index_tuple}"""
                    conn.execute(sql_update)
                asin_list = list(self.df_read.asin)
                return asin_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")

                self.mysql_reconnect(table_name=self.db_syn, e=e)
                continue

    def save_data(self):
        df_self_asin_detail = pd.DataFrame(data=self.asin_detail_list, columns=self.asin_cols)
        self.asin_list_update = list(df_self_asin_detail.asin)
        while True:
            try:
                if self.asin_detail_list:
                    df_self_asin_detail.to_sql(f"{self.db_competitive_aisn}", con=self.engine, if_exists='append',
                                               index=False)
                break
            except Exception as e:
                try:
                    print(f"存储{self.asin_detail_list}失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                except:
                    pass
                self.mysql_reconnect(table_name=f"{self.db_competitive_aisn}", e=e)
                continue

        if self.requests_error_asin_list:
            self.db_change_state(state=1)
        if self.asin_list_update:
            self.db_change_state(state=3)
        if self.asin_not_found_list:
            self.db_change_state(state=4)
        if self.asin_not_foot2_list:
            self.db_change_state(state=8)
        if self.asin_not_response_list:
            self.db_change_state(state=10)
        if self.asin_not_redirect_list:
            self.db_change_state(state=12)
        if self.asin_not_div_id_dp_list:
            self.db_change_state(state=13)

    def db_change_state(self, state=2):
        if state == 1:
            self.db_change_state_common(state=state, asin_list=self.requests_error_asin_list)
        if state == 3:
            self.db_change_state_common(state=state, asin_list=self.asin_list_update)
        elif state == 4:
            self.db_change_state_common(state=state, asin_list=self.asin_not_found_list)
        elif state == 8:
            self.db_change_state_common(state=state, asin_list=self.asin_not_foot2_list)
        elif state == 10:
            self.db_change_state_common(state=state, asin_list=self.asin_not_response_list)
        elif state == 12:
            self.db_change_state_common(state=state, asin_list=self.asin_not_redirect_list)
        elif state == 13:
            self.db_change_state_common(state=state, asin_list=self.asin_not_div_id_dp_list)

    def db_change_state_common(self, state, asin_list):
        print(f"==================== 存储状态 {state} 数据 ========== {len(asin_list)} ========")
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        # if state == 3:
        #     # 剔除状态 7，9 的id
        #     df = self.df_read.loc[
        #         (self.df_read.asin.isin(asin_list)) & ~(self.df_read.asin.isin(self.asin_not_foot_list)) & ~(
        #             self.df_read.asin.isin(self.asin_not_buyBox_list))]
        id_tuple = tuple(df.id)
        while True:
            try:
                with self.engine.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_syn} set state={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_syn} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_syn}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                self.mysql_reconnect(table_name=self.db_syn, e=e)
                continue
