import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
import traceback
import time
print('存储 公司asin 每日抓取监控')


class Save_asin_self(BaseUtils):
    def __init__(self, site_name='us', read_size=100):
        super().__init__()
        self.site_name = site_name  # 站点
        self.asin_detail_list = []
        self.stop_item_queue = True
        self.cols = ['asin', 'page_error', 'title_error', 'img_error', 'selling_error', 'ccategory_error',
                     'buy_now_error', 'erp_seller', 'sku', 'search_ccategory_error']

        self.asin_cols = ['asin', 'title', 'img_url', 'rating', 'total_comments', 'price', "rank", 'category',
                          'launch_time',
                          'volume', 'weight', "page_inventory", "buy_box_seller_type", 'title_len', "video_url",
                          "a+_url", "material", 'mpn', 'online_time', 'describe', 'five_star', 'four_star',
                          'three_star', 'two_star', 'one_star', 'low_star','brand']
        self.init_db_names()
        self.read_size = read_size
        self.reuests_para_val = Requests_param_val(site_name=self.site_name)
        self.item_asin_detail_list = []
        self.asin_update_list = []

    def init_db_names(self):
        """
        1. 初始化数据库连接
        2. 初始化数据库表名
        """
        self.engine = self.mysql_connect()
        self.db_erp_asin_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_us_erp_asin_syn'][2:]
        self.db_erp_asin = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_us_erp_asin'][2:]
        self.db_self_asin_detail = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_self_asin_detail'][2:]

        sql_read = "SELECT text_name FROM censored_thesaurus WHERE data_type='负面词汇'"
        print(sql_read)
        df = pd.read_sql(sql_read, con=self.engine)
        self.text_name_list = list(df.text_name)
        print('负面词汇:', self.text_name_list)

        # sql_read_zh = "SELECT text_name FROM censored_thesaurus WHERE data_type='中文词汇'"
        # print(sql_read_zh)
        # df_zh = pd.read_sql(sql_read_zh, con=self.engine)
        # self.text_zh_list = list(df_zh.text_name)
        # print('中文词汇:', self.text_zh_list)

    def process_item(self, item_queue, item_asin_queue, asin_update_queue, requests_error_asin_list):
        print("数据添加列表中")
        self.requests_error_asin_list = requests_error_asin_list
        while True:
            if item_queue.empty() == False:
                item = item_queue.get()
                item_list = []
                item_list.append(item['asin'])
                item_list.append(item['page_error'])
                item_list.append(item['title_error'])
                item_list.append(item['img_error'])
                item_list.append(item['selling_error'])
                item_list.append(item['ccategory_error'])
                item_list.append(item['buy_now_error'])
                item_list.append(item['erp_seller'])
                item_list.append(item['sku'])
                item_list.append(item['search_ccategory_error'])
                self.asin_detail_list.append(item_list)
            if item_asin_queue.empty() == False:
                item_asin = item_asin_queue.get()
                item_asin_list = []
                # 需要存到数据库的字段
                item_asin_list.append(item_asin['asin'])
                item_asin_list.append(item_asin['title'])
                item_asin_list.append(item_asin['img_url'])
                item_asin_list.append(item_asin['rating'])
                item_asin_list.append(item_asin['total_comments'])
                item_asin_list.append(item_asin['price'])
                item_asin_list.append(item_asin['rank'])
                item_asin_list.append(item_asin['category'])
                item_asin_list.append(item_asin['launch_time'])
                item_asin_list.append(item_asin['volume'])
                item_asin_list.append(item_asin['weight'])
                item_asin_list.append(item_asin['page_inventory'])
                item_asin_list.append(item_asin['buy_box_seller_type'])
                item_asin_list.append(item_asin['title_len'])
                item_asin_list.append(item_asin['video_url'])
                item_asin_list.append(item_asin['a+_url'])
                item_asin_list.append(item_asin['material'])
                item_asin_list.append(item_asin['mpn'])
                item_asin_list.append(item_asin['online_time'])
                item_asin_list.append(item_asin['describe'])
                item_asin_list.append(item_asin['five_star'])
                item_asin_list.append(item_asin['four_star'])
                item_asin_list.append(item_asin['three_star'])
                item_asin_list.append(item_asin['two_star'])
                item_asin_list.append(item_asin['one_star'])
                item_asin_list.append(item_asin['low_star'])
                item_asin_list.append(item_asin['Brand'])
                self.item_asin_detail_list.append(item_asin_list)

            if asin_update_queue.empty() == False:
                self.asin_update_list.append(asin_update_queue.get())
            else:
                print("完成")
                if item_queue.empty() and asin_update_queue.empty() and item_asin_queue.empty():
                    self.save_data()
                    self.asin_detail_list = []
                    self.item_asin_detail_list = []
                    self.asin_update_list = []
                    print("队列为空，跳出循环")
                    break

    def read_db_data(self):
        while True:
            try:
                with self.engine.begin() as conn:
                    sql_read = f"SELECT asin, id, is_variation, account_name, price1, price2, erp_seller, sku, asin_type,us_upload_info FROM {self.db_erp_asin_syn} WHERE asin_type=1 and STATE=1 order by asin_type,state LIMIT {self.read_size}  for update;"
                    print('sql: ', sql_read)
                    a = conn.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=["asin", "id", "is_variation", "account_name", "price1",
                                                            "price2", "erp_seller", 'sku', 'asin_type',
                                                            'us_upload_info'])
                    self.df_read.drop_duplicates(['asin'], inplace=True)
                    if self.df_read.shape[0] != 0:
                        print("抓取销量大于100的asin")
                        # 转换字符串
                        self.df_read.is_variation = self.df_read.is_variation.astype("U")
                        self.df_read.price1 = self.df_read.price1.astype("U")
                        self.df_read.price2 = self.df_read.price2.astype("U")
                        self.df_read.sku = self.df_read.sku.astype("U")
                        self.df_read.asin_type = self.df_read.asin_type.astype("U")
                        self.df_read[
                            'asin_merge'] = self.df_read.asin + '|' + self.df_read.is_variation + '|' + self.df_read.account_name + '|' + self.df_read.price1 + '|' + self.df_read.price2 + '|' + self.df_read.erp_seller + '|' + self.df_read.sku + '|' + self.df_read.asin_type + '|' + self.df_read.us_upload_info
                        asin_list = list(self.df_read.asin_merge)
                    else:
                        # asin_list = []
                        print("销量大于100的 aisn 已经抓取完。现在开始抓取剩下公司asin")
                        sql_read = f'SELECT asin, id, asin_type FROM {self.db_erp_asin_syn} WHERE asin_type in (2,3) and STATE=1 order by asin_type, state LIMIT {self.read_size}  for update;'
                        print('sql2:: ', sql_read)
                        a = conn.execute(sql_read)
                        self.df_read = pd.DataFrame(a, columns=["asin", "id", 'asin_type'])
                        if self.df_read.shape[0] == 0:
                            self.stop_item_queue = False
                            return []
                        self.df_read.asin_type = self.df_read.asin_type.astype("U")
                        self.df_read[
                            'asin_merge'] = self.df_read.asin + '|' + '0' + '|' + '0' + '|' + '0' + '|' + '0' + '|' + '0' + '|' + '0' + '|' + self.df_read.asin_type + '|' + '0'
                        asin_list = list(self.df_read.asin_merge)

                    self.index_tuple = tuple(self.df_read['id'])
                    if len(self.index_tuple) == 1:
                        sql_update = f"""UPDATE {self.db_erp_asin_syn} a set state=2 where a.id in ({self.index_tuple[0]})"""
                    else:
                        sql_update = f"""UPDATE {self.db_erp_asin_syn} a set state=2 where a.id in {self.index_tuple}"""
                    conn.execute(sql_update)
                    return asin_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                self.mysql_reconnect(table_name=f'{self.db_erp_asin_syn}', e=e)
                continue

    def save_data(self):
        print(f"db_erp_asin 詳情數據数据 {self.db_self_asin_detail} {len(self.item_asin_detail_list)}")
        df_asin_detail = pd.DataFrame(data=self.asin_detail_list, columns=self.cols)
        df_self_asin_detail = pd.DataFrame(data=self.item_asin_detail_list, columns=self.asin_cols)
        while True:
            try:
                # 存储数据
                if self.asin_detail_list:
                    df_asin_detail.to_sql(f"{self.db_erp_asin}", con=self.engine, if_exists='append', index=False)
                    self.asin_detail_list = []
                # if self.item_asin_detail_list:
                #     df_self_asin_detail.to_sql(f"{self.db_self_asin_detail}", con=self.engine, if_exists='append',
                #                                index=False)
                #     self.item_asin_detail_list = []
                break
            except Exception as e:
                try:
                    print(f"存储{self.asin_update_list}失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                except:
                    pass
                self.mysql_reconnect(table_name=f"{self.db_erp_asin}", e=e)
                continue

        if self.requests_error_asin_list:
            self.db_change_state(state=1)
            self.requests_error_asin_list = []
        if self.asin_update_list:
            self.db_change_state(state=3)
            self.asin_update_list = []

    def db_change_state(self, state=2):
        if state == 1:
            self.db_change_state_common(state=state, asin_list=self.requests_error_asin_list)
        if state == 3:
            self.db_change_state_common(state=state, asin_list=self.asin_update_list)

    def db_change_state_common(self, state, asin_list):
        print(f"==================== 存储状态 {state} 数据 ========== {len(asin_list)} ========")
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        id_tuple = tuple(df.id)
        while True:
            try:
                with self.engine.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_erp_asin_syn} set state={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_erp_asin_syn} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_erp_asin_syn}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                self.mysql_reconnect(table_name=self.db_erp_asin_syn, e=e)
                continue

    def title_five_date(self):
        print('执行 标题对比 重复率')
        time_line = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        asin_sql = f"SELECT DISTINCT(asin) FROM {self.site_name}_erp_asin WHERE title_error = 1 and created_at like '{time_line}%%'"
        df_read = pd.read_sql(asin_sql, con=self.engine)
        asin_tuple = tuple(df_read.asin)
        update_asin = []
        for asin in asin_tuple:
            try:
                sql = f"SELECT asin,title FROM {self.site_name}_self_asin_detail WHERE asin= '{asin}' ORDER BY created_at DESC  LIMIT 2"
                df_read = pd.read_sql(sql, con=self.engine)
                df_read['asin_merge'] = df_read.asin + '|-|' + df_read.title
                text_name_list = list(df_read.asin_merge)
                if len(text_name_list) == 2:
                    print(text_name_list)
                    asin_title_0 = text_name_list[0].split('|-|')
                    title_list_0 = asin_title_0[1]
                    asin_title_1 = text_name_list[1].split('|-|')
                    title_list_1 = asin_title_1[1]
                    title_1 = title_list_1.split(' ')
                    num_list = []
                    for i in title_1:
                        if i in title_list_0:
                            num_list.append(1)
                    n = len(num_list) / len(title_1) * 100
                    if int(n) < 50:
                        print(asin_title_1[0])
                        update_asin.append(asin_title_1[0])
            except:
                pass
        if update_asin:
            up_asin_sql = f"UPDATE us_erp_asin set title_error=4 WHERE asin in {tuple(update_asin)} and created_at like '{time_line}%%'"
            print(up_asin_sql)
            with self.engine.begin() as conn:
                conn.execute(up_asin_sql)


if __name__ == '__main__':
    Save_asin_self().title_five_date()
