import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.db_connect import BaseUtils
import requests
import time
from queue import Queue, Empty
import threading
import pandas as pd
import traceback
from multiprocessing import Pool
import random
from sqlalchemy import text


class KeepApi(BaseUtils):
    def __init__(self, site_name=None, proxy_name=None):
        super().__init__()
        self.site_name = site_name  # 站点
        self.read_size = 20
        self.list_keepA = []
        self.df_concat_list = []
        self.asin_queue = Queue()
        self.data_queue = Queue()
        self.asin_detail_list = []
        self.init_db_names()
        self.requests_error_asin_list = []
        self.asin_list = []
        self.asin_list_update = []
        self.asin_orders_sale_list = []
        self.stop_flag = False

    def init_db_names(self):
        self.engine_pg = self.pg_connect_6()
        self.db_syn = f'{self.site_name}_st_keepa_syn_2024'
        self.cols_db = ['asin', 'asin_trun_4', 'title', 'img_url', 'listed_since', 'first_bsr_label', 'last_bsr_label',
                        'last_bsr_rank', 'last_price', 'last_count_reviews',
                        'total_days', 'currency', 'times_list', 'price_list', 'rank_list', 'first_bsr_list',
                        'last_bsr_list', 'count_reviews_list',
                        'variations', 'data_zoom_start', 'date_range', 'color', 'size', 'last_bsr_rating',
                        'rating_count', 'rating_count_list', 'min_list',
                        'max_list', 'current_list', 'avg_list', 'avg90_list', 'avg180_list']

    def get_tokens(self):
        try:
            params = {
                "key": 'engpg35aootqo11korrc9p4otp0p1il5a5ssnjvdq5ehfnh6us8hb1klobq61c9t',

            }
            resp = requests.get(f"https://api.keepa.com/token/?", params=params, timeout=10).json()
            print('查看剩余请求数：', resp)
            return resp['tokensLeft']
        except Exception as e:
            print('查询 tokens 报错：', e, f"{traceback.format_exc()}")
            return 50000

    def keep_request(self, t_num):
        while True:
            try:
                asin_type = self.asin_queue.get(block=False)
                print('请求 asin：：', asin_type)
                asin, asin_trun_4 = asin_type.split('|')
                params = {
                    "key": 'engpg35aootqo11korrc9p4otp0p1il5a5ssnjvdq5ehfnh6us8hb1klobq61c9t',
                    'domain': '1',
                    'asin': asin,
                    'stats': '2011-01-01,2025-01-01',
                    'rating': 1
                }
                try:
                    tproduct = requests.get('https://api.keepa.com/product', params=params, timeout=20)
                    print('keepa api:', tproduct.status_code)
                    data_dict = requests.post('http://192.168.10.225:5002/soundasia_selection/usKeepaTrend/jxKeepaData',
                                              json=tproduct.json(), timeout=20)
                    print('Java 接口 解析数据：', data_dict.status_code)
                    item = {'asin_data': data_dict.json(), 'asin_trun_4': asin_trun_4, 'asin': asin}
                    self.data_queue.put(item)
                except Exception as e:
                    print("keep请求失败，等待3s重新请求\n", traceback.format_exc())
                    self.requests_error_asin_list.append(asin)
            except Empty:
                break

    def read_db_data(self):
        while True:
            try:
                with self.engine_pg.begin() as connection:
                    sql_read = f'SELECT asin, id,asin_trun_4 FROM {self.db_syn} WHERE STATE=1 LIMIT {self.read_size} FOR UPDATE;'
                    print(sql_read)
                    a = connection.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=['asin', 'id', 'asin_trun_4'])
                    self.df_read.drop_duplicates(['asin'], inplace=True)
                    self.index_tuple = tuple(self.df_read['id'])
                    if self.index_tuple:
                        if len(self.index_tuple) == 1:
                            sql_update = f"""UPDATE {self.db_syn} a set state=2 where a.id in ({self.index_tuple[0]})"""
                        else:
                            sql_update = f"""UPDATE {self.db_syn} a set state=2 where a.id in {self.index_tuple}"""
                        connection.execute(sql_update)
                    else:
                        return []
                self.asin_list = list(self.df_read.asin + '|' + self.df_read.asin_trun_4)
                break
            except Exception as e:
                print('读取报错。read_db_data', e, f"\n{traceback.format_exc()}")
                time.sleep(60)
                self.pg_connect()
                continue

    def db_change_state(self, state=2, asin_list=None):
        self.db_change_state_common(state=state, asin_list=asin_list)

    def db_change_state_common(self, state, asin_list):
        print(f"==================== 存储状态 {state} 数据 ========== {len(asin_list)} ========")
        df = self.df_read.loc[self.df_read.asin.isin(asin_list)]
        id_tuple = tuple(df.id)
        while True:
            try:
                with self.engine_pg.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_syn} set state=3 where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_syn} set state=3 where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_syn}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                time.sleep(50)
                self.pg_connect()
                continue

    def process_item(self):
        while True:
            try:
                if self.data_queue.empty() == False:
                    item_dict = self.data_queue.get()
                    item = item_dict['asin_data']['result']
                    asin_trun_4 = item_dict['asin_trun_4']
                    item_list = []
                    asin_orders_sale_list = []
                    asin = item_dict['asin']
                    if asin:
                        item_list.append(str(asin))
                        asin_orders_sale_list.append(str(asin))
                    else:
                        item_list.append(None)
                    item_list.append(str(asin_trun_4))

                    title = item.get('title')
                    if title:
                        item_list.append(str(title))
                    else:
                        item_list.append(None)
                    imgUrl = item.get('imgUrl')
                    if imgUrl:
                        item_list.append(str(imgUrl))
                    else:
                        item_list.append(None)
                    listedSince = item.get('listedSince')
                    if listedSince:
                        item_list.append(str(listedSince))
                    else:
                        item_list.append(None)
                    firstBsrLabel = item.get('firstBsrLabel')
                    if firstBsrLabel:
                        item_list.append(firstBsrLabel)
                        asin_orders_sale_list.append(firstBsrLabel)
                    else:
                        item_list.append(None)
                    lastBsrLabel = item.get('lastBsrLabel')
                    if lastBsrLabel:
                        item_list.append(str(lastBsrLabel))
                    else:
                        item_list.append(None)
                    lastBsrRank = item.get('lastBsrRank')
                    if lastBsrRank:
                        item_list.append(str(lastBsrRank))
                    else:
                        item_list.append(None)
                    lastPrice = item.get('lastPrice')
                    if lastPrice:
                        item_list.append(lastPrice)
                        asin_orders_sale_list.append(lastPrice)
                    else:
                        item_list.append(None)
                    lastCountReviews = item.get('lastCountReviews')
                    if lastCountReviews:
                        item_list.append(str(lastCountReviews))
                    else:
                        item_list.append(None)
                    totalDays = item.get('totalDays')
                    if totalDays:
                        item_list.append(totalDays)
                    else:
                        item_list.append(None)
                    currency = item.get('currency')
                    if currency:
                        item_list.append(str(currency))
                    else:
                        item_list.append(None)
                    timesList = item.get('timesList')
                    if timesList:
                        item_list.append(timesList)
                    else:
                        item_list.append(None)
                    priceList = item.get('priceList')
                    if priceList:
                        item_list.append(priceList)
                    else:
                        item_list.append(None)
                    rankList = item.get('rankList')
                    if rankList:
                        item_list.append(str(rankList))
                    else:
                        item_list.append(None)
                    firstBsrList = item.get('firstBsrList')
                    if firstBsrList:
                        item_list.append(str(firstBsrList))
                        asin_orders_sale_list.append(str(firstBsrList))
                    else:
                        item_list.append(None)
                    lastBsrList = item.get('lastBsrList')
                    if lastBsrList:
                        item_list.append(str(lastBsrList))
                    else:
                        item_list.append(None)
                    countReviewsList = item.get('countReviewsList')
                    if countReviewsList:
                        item_list.append(str(countReviewsList))
                    else:
                        item_list.append(None)
                    variations = item.get('variations')
                    if variations:
                        item_list.append(str(variations))
                    else:
                        item_list.append(None)
                    dataZoomStart = item.get('dataZoomStart')
                    if dataZoomStart:
                        item_list.append(str(dataZoomStart))
                    else:
                        item_list.append(None)
                    dateRange = item.get('dateRange')
                    if dateRange:
                        item_list.append(str(dateRange))
                    else:
                        item_list.append(None)
                    color = item.get('color')
                    if color:
                        item_list.append(str(color))
                    else:
                        item_list.append(None)
                    size = item.get('size')
                    if size:
                        str_size = str(size)[:150]
                        item_list.append(str_size)
                    else:
                        item_list.append(None)
                    lastBsrRating = item.get('lastBsrRating')
                    if lastBsrRating:
                        item_list.append(str(lastBsrRating))
                    else:
                        item_list.append(None)
                    ratingCount = item.get('ratingCount')
                    if ratingCount:
                        item_list.append(str(ratingCount))
                    else:
                        item_list.append(None)
                    ratingCountList = item.get('ratingCountList')
                    if ratingCountList:
                        item_list.append(str(ratingCountList))
                    else:
                        item_list.append(None)
                    minList = item.get('minList')
                    if minList:
                        item_list.append(str(minList))
                    else:
                        item_list.append(None)
                    maxList = item.get('maxList')
                    if maxList:
                        item_list.append(str(maxList))
                    else:
                        item_list.append(None)
                    currentList = item.get('currentList')
                    if currentList:
                        item_list.append(str(currentList))
                    else:
                        item_list.append(None)
                    avgList = item.get('avgList')
                    if avgList:
                        item_list.append(str(avgList))
                    else:
                        item_list.append(None)
                    avg90List = item.get('avg90List')
                    if avg90List:
                        item_list.append(str(avg90List))
                    else:
                        item_list.append(None)
                    avg180List = item.get('avg180List')
                    if avg180List:
                        item_list.append(str(avg180List))
                    else:
                        item_list.append(None)
                    self.asin_detail_list.append(item_list)
                    self.asin_orders_sale_list.append(asin_orders_sale_list)
                else:
                    if self.data_queue.empty():
                        self.sava_data()
                        self.asin_detail_list = []
                        break
            except:
                continue

    def sava_data(self):
        try:
            print('插入数据')
            df_asin_data = pd.DataFrame(data=self.asin_detail_list, columns=self.cols_db)
            asin_week_list = [[asin, str(week)] for asin, week in
                              zip(list(df_asin_data['asin']), list(df_asin_data['asin_trun_4']))]
            asin_week_dict = {}
            for asin_week in asin_week_list:
                asin, week_list = asin_week[0], str(asin_week[1])
                week_list = week_list.split(",")
                for week in week_list:
                    if week not in asin_week_dict.keys():
                        asin_week_dict[week] = []
                    var_list = asin_week_dict[week]
                    var_list.append(asin)
                    asin_week_dict[week] = var_list
            for week, asin_list in asin_week_dict.items():
                df = df_asin_data.loc[df_asin_data.asin.isin(asin_week_dict[week])]
                df = df.loc[:, self.cols_db]
                df_sava = df.drop('asin_trun_4', axis=1)
                asin_type = week.lower()
                if asin_type[:3] in ['b00', 'b01', 'b02', 'b03', 'b04', 'b05', 'b06']:
                    table_name = 'b00_b06'
                elif asin_type[:3] in ['b07']:
                    table_name = 'b07'
                elif asin_type[:3] in ['b08']:
                    table_name = 'b08'
                elif asin_type[:3] in ['b09']:
                    table_name = 'b09'
                elif asin_type[:3] in ['b0b']:
                    table_name = 'b0b'
                else:
                    table_name = 'other'
                if df_sava.shape[0] > 0:
                    with self.engine_pg.begin() as conn:
                        if len(set(df_sava.asin)) == 1:
                            sql_delete = f"delete from {self.site_name}_keepa_trend_{table_name} where asin in ('{tuple(df_sava.asin)[0]}');"
                        else:
                            sql_delete = f"delete from {self.site_name}_keepa_trend_{table_name} where asin in {tuple(set(df_sava.asin))};"
                        print(sql_delete)
                        conn.execute(sql_delete)
                df_sava.to_sql(f'{self.site_name}_keepa_trend_{table_name}', con=self.engine_pg, if_exists='append',
                               index=False)
            self.asin_list_update = list(df_asin_data.asin)
        except Exception as e:
            print("存储 存储 asin keepa 数据错误数据错误", e, f"\n{traceback.format_exc()}")
            self.pg_connect()
        if self.requests_error_asin_list:
            self.db_change_state(state=1, asin_list=self.requests_error_asin_list)
        if self.asin_list_update:
            self.db_change_state(state=3, asin_list=self.asin_list_update)

    def init_list(self):
        self.asin_list = []
        self.asin_queue = Queue()
        self.asin_detail_list = []
        self.requests_error_asin_list = []
        self.asin_list_update = []
        self.asin_orders_sale_list = []

    def run(self):
        while True:
            self.read_db_data()
            if self.asin_list:
                for asin in self.asin_list:
                    self.asin_queue.put(asin)
            else:
                break
            html_thread = []
            tokens = self.get_tokens()
            if tokens < 2000:
                num = 1
            else:
                num = 20
            for i in range(num):
                thread2 = threading.Thread(target=self.keep_request, args=(i,))
                html_thread.append(thread2)
            for ti in html_thread:
                ti.start()
            for t2 in html_thread:
                t2.join()
            print('存储')
            self.process_item()
            print('初始化参数')
            self.init_list()

if __name__ == '__main__':
    keep_api = KeepApi(site_name='us')
    keep_api.run()
