import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.db_connect import BaseUtils
from amazon_params import py_ja3

from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.requests_param import Requests_param_val
from queue import Queue
import time
import random
from lxml import etree
import json
from curl_cffi import requests
import requests as requests2

sess = requests2.Session()
import traceback
import pandas as pd
import threading
import urllib3
import datetime
import re
import uuid

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

"""店铺 feedback 抓取"""


class async_account_feedback(BaseUtils):
    def __init__(self, site_name='us', read_size=500, proxy_name=None, week=None):
        super().__init__()
        self.site_name = site_name  # 站点
        print(site_name, '代理 async_account_feedback: ', proxy_name)
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, spider="seller_account",
                                                   proxy_name=proxy_name)
        self.week = week
        self.read_size = read_size
        self.init_db_names()
        self.ip_num_proxy = 0
        self.ip_num = 0
        self.cookies_queue = Queue()  # cookie队列
        self.item_queue = Queue()  # 存数据队列
        self.queries_account_url_queue = Queue()  # 需要爬取的asin队列
        self.account_id_not_found_list = []  # 存储状态4的卖家id
        self.requests_error_asin_list = []  # 存储状态请求错误的卖家id 修改状态1
        self.account_list_update = []
        self.stop_item_queue = True  # 用于是否退出循环存储的条件
        self.request_param_val_dict = {}
        self.account_detail_list = []
        self.products_link_account_name_list = []
        # 需要存到数据库的字段
        self.cols_db = ['seller_id', 'count_30_day', 'count_1_year', 'count_lifetime', "site_name", 'date_info','seller_address']
        # 返回 对应站点的host，首页链接
        self.site_url, self.host = self.reuests_para_val.get_site_url(self.site_name)
        self.headers_num_int = 0
        self.cookie_dict_delete_id = {}
        self.delete_cookies_list = []

    def init_db_names(self):
        self.engine = self.mysql_connect()
        self.engine_pg = self.pg_connect()
        year = time.strftime("%Y")
        month = time.strftime("%m")
        if int(self.week) < 10:
            _week = f'0{self.week}'
        else:
            _week = self.week
        self.year_week = f"{year}-{_week}"
        self.year_month = f'{year}_{month}'
        self.db_seller_account_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_syn'][2:] + '_distinct'
        self.db_seller_account_feedback = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_feedback'][2:]
        self.db_seller_account_product_syn = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_seller_account_product_syn'][
                                                              2:]
        self.db_search_term = self.site_name + DB_REQUESTS_ASIN_PARAMS["db_search_term"][2:]

    def get_account_url(self, t_num):
        while True:
            if self.queries_account_url_queue.empty() == False:
                querys = self.queries_account_url_queue.get()
                if self.cookies_queue.empty():
                    cookies_dict = self.reuests_para_val.get_cookie()
                    self.cookie_dict_delete_id = cookies_dict
                    for ck in cookies_dict.values():
                        self.cookies_queue.put(ck)
                # 获取组装cookie
                cookie_str = self.reuests_para_val.get_cookie_str(self.cookies_queue)
                query = querys.split('|-|')
                if "seller=" in query[0]:
                    seller = re.findall("seller=(.*)&", query[0])
                    if len(seller) == 0:
                        seller = re.findall("seller=(.*)", query[0])
                    scraper_url = "{}sp?ie=UTF8&seller={}".format(self.site_url, seller[0])
                else:
                    scraper_url = query[0]  # 卖家页面url
                account_name = query[1]  # 卖家名称
                seller_id = query[2]  # 卖家id
                headers = self.reuests_para_val.requests_amazon_headers(host=self.host, site_url=self.site_url,
                                                                        asin=None, scraper_url=scraper_url)
                print(self.headers_num_int, account_name, scraper_url)
                headers["cookie"] = cookie_str
                try:
                    print('请求url: ', scraper_url)
                    sess.mount(self.site_url, py_ja3.DESAdapter())
                    resp = sess.get(scraper_url, headers=headers,
                                    timeout=10, verify=False)
                    resp.close()
                    if self.reuests_para_val.check_amazon_yzm(resp):
                        print(f"{self.site_name}  站点  +   使用代理ip出现验证码：{scraper_url}")
                        self.requests_error_asin_list.append(seller_id)
                        self.headers_num_int += 1
                        time.sleep(random.uniform(1.5, 5.5))
                        continue
                except Exception as e:
                    print("{} 请求错误错误".format(account_name), e)
                    self.requests_error_asin_list.append(seller_id)
                    time.sleep(random.uniform(2, 3.5))
                    continue
                response_url = scraper_url
                response = resp.text
                response_s = etree.HTML(response)
                try:
                    ingress = response_s.xpath("//span[@id='glow-ingress-line2']/text()")
                except Exception as e:
                    print(seller_id, "html 获取失败，返回空 html", response_url)
                    self.requests_error_asin_list.append(seller_id)
                    continue
                if self.reuests_para_val.check_amazon_not_page(response):
                    print(f" 页面变狗 {resp.url}")
                    self.headers_num_int += 1
                    self.requests_error_asin_list.append(seller_id)
                    time.sleep(random.uniform(4, 10.5))
                    continue
                # 获取邮编
                try:
                    ingress = ingress[0].strip()
                    print('ingress:',ingress)
                    if self.reuests_para_val.check_amazon_ingress(ingress):
                        try:
                            cookie_ubid_main_id = re.findall(r'ubid-main=(.*?);', cookie_str)[0]
                        except:
                            cookie_ubid_main_id = re.findall(r'session-id=(.*?);', cookie_str)[0]
                        for cookie_key_value in self.cookie_dict_delete_id.items():
                            if cookie_ubid_main_id in cookie_key_value[1]:
                                self.delete_cookies_list.append(cookie_key_value[0])
                        self.requests_error_asin_list.append(seller_id)
                        time.sleep(random.uniform(4, 10.5))
                        continue
                except:
                    ingress = None
                    print("获取邮编錯誤:")
                    self.headers_num_int += 1
                all_account_list = response_s.xpath(
                    '//table[@id="feedback-summary-table"]//tr[last()]//td[@class="a-text-right"]/span//text()')
                all_time_list = response_s.xpath('//table[@id="feedback-summary-table"]//th/text()')
                site_name_list = response_s.xpath(
                    '//ul[@class="a-unordered-list a-nostyle a-vertical"]//li/span/text()|//div[contains (@id,"detail-seller")]//span/text()')
                if site_name_list:
                    print('完整地址拼接：：','|-|'.join(site_name_list))
                    seller_address = '|-|'.join(site_name_list)
                    if len(site_name_list[-1]) == 2:
                        site_name_ = site_name_list[-1]
                    else:
                        site_name_ = None

                else:
                    seller_address = None
                    site_name_ = None
                if len(all_account_list) == len(all_time_list) and len(all_time_list) == 4:
                    count_30_day = all_account_list[0].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                        ' ', '').strip()
                    count_90_day = all_account_list[1].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                        ' ', '').strip()
                    count_1_year = all_account_list[2].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                        ' ', '').strip()
                    count_lifetime = all_account_list[3].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                        ' ', '').strip()
                    site_name = site_name_
                else:
                    if site_name_ is None and ingress is None:
                        print("获取数据失败")
                        self.requests_error_asin_list.append(seller_id)
                        continue
                    else:
                        day_30 = response_s.xpath(
                            "//div[@id='rating-thirty-num']/span[@class='ratings-reviews-count']/text()")
                        if day_30:
                            count_30_day = day_30[0].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                                ' ', '').strip()
                        else:
                            count_30_day = 0
                        day_90 = response_s.xpath(
                            "//div[@id='rating-90-num']/span[@class='ratings-reviews-count']/text()")
                        if day_90:
                            count_90_day = day_90[0].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                                ' ', '').strip()
                        else:
                            count_90_day = 0
                        day_365 = response_s.xpath(
                            "//div[@id='rating-365d-num']/span[@class='ratings-reviews-count']/text()")
                        if day_365:
                            count_1_year = day_365[0].replace(',', '').replace('\xa0', '').replace('.', '').replace(
                                ' ', '').strip()
                        else:
                            count_1_year = 0
                        all_lifetime = response_s.xpath(
                            "//div[@id='rating-lifetime-num']/span[@class='ratings-reviews-count']/text()")
                        if all_lifetime:
                            count_lifetime = all_lifetime[0].replace(',', '').replace('\xa0', '').replace('.',
                                                                                                          '').replace(
                                ' ', '').strip()
                        else:
                            count_lifetime = 0
                        print(day_30, day_90, day_365, all_lifetime)
                        site_name = site_name_
                item = {'seller_id': seller_id, 'site_name': site_name, 'account_name': account_name,
                        'count_30_day': int(count_30_day), 'count_90_day': int(count_90_day),
                        'count_1_year': int(count_1_year), 'count_lifetime': int(count_lifetime),'seller_address':seller_address
                        }
                print(item)
                self.item_queue.put(item)
            else:
                print(f"当前线程-{t_num} 已完成-爬取-跳出循环")
                break

    def init_list(self):
        print("清空变量")
        self.item_queue = Queue()  # 存数据队列
        self.queries_account_url_queue = Queue()  # 需要爬取的asin队列
        self.account_id_not_found_list = []  # 存储状态4的卖家id
        self.requests_error_asin_list = []  # 存储状态请求错误的卖家id 修改状态1
        self.account_detail_list = []  # 存储详情数据列表
        self.account_list_update = []
        self.products_link_account_name_list = []
        self.headers_num_int = 0
        self.delete_cookies_list = []


    def run(self):
        account_querys_url_list = self.read_db_data()
        if account_querys_url_list:
            if self.cookies_queue.empty():
                cookies_dict = self.reuests_para_val.get_cookie()
                self.cookie_dict_delete_id = cookies_dict
                for ck in cookies_dict.values():
                    self.cookies_queue.put(ck)
            for account_name_url in account_querys_url_list:
                self.queries_account_url_queue.put(account_name_url)
            html_thread = []
            for i in range(10):
                thread2 = threading.Thread(target=self.get_account_url, args=(i,))
                html_thread.append(thread2)
                time.sleep(2)
            for ti in html_thread:
                ti.start()
            for t2 in html_thread:
                t2.join()
            # 存储数据
            print("存储数据")
            self.process_item()
            self.reuests_para_val.delete_china_cookie(self.delete_cookies_list)
            # 清空变量，
            self.init_list()
        else:
            if self.stop_item_queue == False:
                from threading_spider.db_connectivity import connect_db
                db_class = connect_db('us')
                cursor_us, connect_us = db_class.us_mysql_db()  # us站点
                print("-----------------店铺feedback----程序结束---------------------跳出")
                sql = f"select status_val from workflow_progress where page='店铺Feedback' and date_info='{self.year_week}' and site_name='{self.site_name}' and date_type='week'"
                print(sql)
                cursor_us.execute(sql)
                status_val_tuple = cursor_us.fetchone()
                print(status_val_tuple)
                if status_val_tuple[0] in (1, 2):
                    time_strftime = time.strftime("%Y-%m-%d %X", time.localtime())
                    update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='店铺Feedback' and date_info='{self.year_week}' and site_name='{self.site_name}' and date_type='week'"
                    cursor_us.execute(update_workflow_progress)
                    connect_us.commit()
                    try:
                        into_workflow_progress = f"INSERT INTO workflow_progress (page, date_info, site_name, date_type, is_end, status_val, status, table_name) VALUES ('店铺Feedback', '{self.year_month.replace('_', '-')}', '{self.site_name}', 'month', '否', 3, '抓取结束','{self.site_name}_seller_account_feedback');"
                        print(into_workflow_progress)
                        cursor_us.execute(into_workflow_progress)
                        connect_us.commit()
                    except:
                        into_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='店铺Feedback' and date_info='{self.year_month.replace('_', '-')}' and site_name='{self.site_name}' and date_type='month'"
                        print(into_workflow_progress)
                        cursor_us.execute(into_workflow_progress)
                        connect_us.commit()
                    url = 'http://47.112.96.71:8082/selection/sendMessage'
                    data = {
                        'account': 'pengyanbing,chenyuanjie,chenjianyun',
                        'title': self.site_name + '站点 feedback',
                        'content': self.year_month.replace('_', '-') + ' 月  feedback 已结束,请确认下一步流程!时间:' + time_strftime
                    }
                    try:
                        requests.post(url=url, data=data, timeout=15)
                    except:
                        pass
                cursor_us.close()
                connect_us.close()

    def process_item(self):
        print("=================开始存储数据======================")
        while True:
            if self.item_queue.empty() == False:
                item = self.item_queue.get()
                item_list = []
                # 需要存到数据库的字段
                item_list.append(item['seller_id'])
                item_list.append(item['count_30_day'])
                item_list.append(item['count_1_year'])
                item_list.append(item['count_lifetime'])
                # item_list.append(log_time)
                item_list.append(item["site_name"])
                item_list.append(self.year_month.replace('_', '-'))
                item_list.append(item['seller_address'])
                self.account_detail_list.append(item_list)
            else:
                if self.item_queue.empty():
                    self.save_data()
                    self.account_detail_list = []
                    # self.save_products_link_account_name()
                    self.products_link_account_name_list = []
                    print("----队列空-----跳出--存储---")
                    break

    def read_db_data(self):
        while True:
            try:
                self.engine = self.mysql_connect()
                sql_read = f'SELECT url, account_name, id,seller_id FROM {self.db_seller_account_syn} WHERE STATE=1 LIMIT {self.read_size} for update;'
                self.df_read = self.engine.read_sql(sql_read)
                if self.df_read.shape[0] == 0:
                    self.stop_item_queue = False
                    print(f"**************** {self.site_name} feedback 抓取完毕 **********************")
                    return []
                with self.engine.begin() as conn:
                    self.index_tuple = tuple(self.df_read['id'])
                    if len(self.index_tuple) == 1:
                        sql_update = f"""UPDATE {self.db_seller_account_syn} a set state=2 where a.id in ({self.index_tuple[0]})"""
                    else:
                        sql_update = f"""UPDATE {self.db_seller_account_syn} a set state=2 where a.id in {self.index_tuple}"""
                    conn.execute(sql_update)
                # self.df_read.id = self.df_read.id.astype("U") # 转字符串
                account_name_url_list = list(
                    self.df_read.url + '|-|' + self.df_read.account_name + '|-|' + self.df_read.seller_id)
                return account_name_url_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e, f"\n{traceback.format_exc()}")
                continue

    def save_data(self):
        while True:
            try:
                self.engine_pg = self.pg_connect()
                df_account_detail = pd.DataFrame(data=self.account_detail_list, columns=self.cols_db)
                df_account_detail.drop_duplicates(['seller_id'], inplace=True)  # 去重
                self.account_list_update = list(df_account_detail.seller_id)
                if df_account_detail.shape[0] > 0:
                    with self.engine_pg.begin() as conn:
                        if len(set(df_account_detail.seller_id)) == 1:
                            sql_delete = f"delete from {self.db_seller_account_feedback + '_' + self.year_month} where seller_id in ('{tuple(df_account_detail.seller_id)[0]}');"
                        else:
                            sql_delete = f"delete from {self.db_seller_account_feedback + '_' + self.year_month} where seller_id in {tuple(set(df_account_detail.seller_id))};"
                        conn.execute(sql_delete)
                    print(f"feedback 信息 {self.db_seller_account_feedback + '_' + self.year_month}")
                    self.engine_pg.to_sql(df_account_detail, self.db_seller_account_feedback + f'_{self.year_month}',
                                             if_exists='append')
                self.account_detail_list = []
                break
            except Exception as e:
                time.sleep(10)
                print(f"存储'{self.db_seller_account_feedback}'失败，等待5s继续", e, f"\n{traceback.format_exc()}")
                continue
        if self.requests_error_asin_list:
            self.db_change_state(state=1)
            self.requests_error_asin_list = []
        if self.account_list_update:
            self.db_change_state(state=3)
            self.account_list_update = []
        if self.account_id_not_found_list:
            self.db_change_state(state=4)
            self.account_id_not_found_list = []

    def db_change_state(self, state=2):
        if state == 1:
            self.db_change_state_common(state=state, account_id_list=self.requests_error_asin_list)
        elif state == 3:
            self.db_change_state_common(state=state, account_id_list=self.account_list_update)
        elif state == 4:
            self.db_change_state_common(state=state, account_id_list=self.account_id_not_found_list)

    def db_change_state_common(self, state, account_id_list):
        print(f"==================== 存储状态 {state} 数据 ========== {len(account_id_list)} ========")
        df = self.df_read.loc[self.df_read.seller_id.isin(account_id_list)]
        id_tuple = tuple(df.id)
        while True:
            try:
                self.engine = self.mysql_connect()
                with self.engine.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if id_tuple:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.db_seller_account_syn} set state={state} where id in ({id_tuple[0]}) and state=2;"
                        else:
                            sql_update = f"update {self.db_seller_account_syn} set state={state} where id in {id_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(f"更改{self.db_seller_account_syn}表的state={state}出错", e, f"\n{traceback.format_exc()}")
                continue