import time
import sys, os
import logging
import requests
import pandas as pd
from urllib.parse import urlparse
from func_timeout import func_set_timeout
from sqlalchemy.exc import OperationalError
from func_timeout.exceptions import FunctionTimedOut
sys.path.append(os.path.dirname(os.path.dirname(sys.path[0])))  # 上级目录
from amazon_spider.utils.utils import send_mg
from amazon_spider.db.redis_db import sadd, expire
from amazon_spider.db.mysql_db import get_country_engine


class AsinStateFind:
    """
    清理_self_all_syn内非（每周和每月）asin
    每月1次(广告) 、每周1次(竞品)、 BSR榜单每日1次、新品榜单每日1次、erp优惠券每日1次、erp前台异常每日1次
    数据分组 聚合 data_type字段 去重后插入到 _self_all_syn表
    """
    def __init__(self, site):
        self.site = site
        self.conn = None
        self.sites = {
            "us": "Amazon.com",
            "uk": "Amazon.co.uk",
            "de": "Amazon.de",
            "es": "Amazon.es",
            "it": "Amazon.it",
            "fr": "Amazon.fr",
            "mx": "Amazon.com.mx",
            "ca": "Amazon.ca",
        }
        # logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s',
        #                     level=logging.INFO)

    @func_set_timeout(10)
    def get_bsr_day_asin(self):
        # BSR榜单每日1次、新品榜单每日1次
        # sql = f"SELECT distinct asin from {self.site}_self_all_syn WHERE state in (1, 2) and site='{self.site}' and data_type like '%%4%%' and date_info is not null;"
        sql = f"SELECT distinct asin from {self.site}_self_all_syn WHERE state in (1, 2) and site='{self.site}';"
        # 测试
        # sql = f"-- SELECT distinct asin, date_info from bsr_day_asin WHERE created_at>='2023-05-15' and site_name='{self.site}';"
        df_bsr_asin = pd.read_sql(sql, con=get_country_engine(self.site))
        if df_bsr_asin.shape[0] > 200:
            return False
        else:
            return True

    def if_bsr_spider_state(self):
        while True:
            try:
                bl = self.get_bsr_day_asin()
                break
            except OperationalError as e:
                logging.info(f"查看每日bsr是否爬取完成失败  连接错误{e}")
                continue
            except FunctionTimedOut as e:
                logging.info(f"查看每日bsr是否爬取完成超时  连接错误{e}")
                continue

        if bl:
            sql = f"SELECT date_info from bsr_day_asin order by date_info desc limit 1;"
            while True:
                try:
                    df_bsr_asin = pd.read_sql(sql, con=get_country_engine(self.site))
                    break
                except OperationalError as e:
                    logging.info(f"查看每日bsr是否爬取完成失败  连接错误{e}")
                    continue
            str_time = time.strftime("%Y-%m-%d", time.localtime())
            print(list(df_bsr_asin["date_info"])[0])
            url = "http://selection.yswg.com.cn:8080/soundasia_selection/workflow/emit"
            if not sadd("bsr_day_asin", url+f"#{str_time}"):
                print("已经发送过请求")
            else:
                seconds = 82800
                expire(f'bsr_day_asin', seconds)
                data = {
                    "siteName": "us",
                    "tableName": "self_asin_day",
                    "dateType": "day",
                    "reportDate": list(df_bsr_asin["date_info"])[0],
                    "status": "asin爬取完成",
                    "statusVal": 10,
                    "remark": "",
                    "isEnd": "否"
                }
                headers = {
                    'Connection': 'close',
                    'authority': urlparse(url).hostname,
                    'accept': 'text/html,*/*',
                    'accept-language': '*',
                    "Content-Type": 'application/json',
                    'origin': url,
                    # 'referer': f'{url}/Bosch-ROS20VSK-Palm-Sander-Collector/product-reviews/B0018Z8D64/ref=cm_cr_arp_d_paging_btm_next_3?ie=UTF8&reviewerType=all_reviews&pageNumber=3',
                }
                responses = requests.post(url, json=data, headers=headers)
                print(responses.text)
                print(responses.status_code)
                if "操作失败" in responses.json().get("message"):
                    send_mg("hezhe", "【bsr爬取提醒接口调用失败】", "请求bsr爬取提醒接口失败")
                else:
                    print("爬取完成")
        else:
            print("state未爬取完")


# AsinStateFind("us").if_bsr_spider_state()

