import time
import re
import pandas as pd
import sys
import os
import urllib3
import random
import uuid
from lxml import etree
# import requests
import json
import hashlib

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
from utils.db_connect import BaseUtils
from queue import Queue
from amazon_spider.VPS_IP import is_internet_available
from datetime import datetime, timedelta
import traceback
from curl_cffi import requests
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings()


class Requests_param_val(BaseUtils):
    def __init__(self, site_name='us', spider="asin", proxy_name=None):
        super().__init__()
        self.site_name = site_name
        self.init_db_names()
        self.proxy_name = 'Kdl_h10'
        print("站点名称：", self.site_name, '抓取项目', "代理ip：", self.proxy_name)
        self.cookies_queue = Queue()  # cookie队列

    def init_db_names(self):
        self.engine_pg = self.pg_connect()
        self.engine = self.mysql_connect()
        self.db_cookies = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_cookies'][2:]
        self.db_ip_16yun = DB_REQUESTS_ASIN_PARAMS['db_ip_16yun']

    def get_site_url(self, site_name):
        if site_name == "us":
            self.site_url = 'https://www.amazon.com/'
            self.host = 'www.amazon.com'
        elif site_name == 'uk':
            self.site_url = 'https://www.amazon.co.uk/'  # 站点url
            self.host = 'www.amazon.co.uk'
        elif site_name == 'de':
            self.site_url = 'https://www.amazon.de/'
            self.host = 'www.amazon.de'
        elif site_name == 'fr':
            self.site_url = 'https://www.amazon.fr/'
            self.host = 'www.amazon.fr'
        elif site_name == 'es':
            self.site_url = 'https://www.amazon.es/'
            self.host = 'www.amazon.es'
        elif site_name == 'it':
            self.site_url = 'https://www.amazon.it/'
            self.host = 'www.amazon.it'
        return self.site_url, self.host

    def check_contain_chinese(self, check_str):
        """
        判断获取文本是否有中文
        """
        if check_str != '无':
            for c in check_str:
                if '\u4e00' <= c <= '\u9fa5':
                    print('--是中文，说明该cookie有问题，或者改数据有问题--')
                    return True

    def filter_str(self, desstr, restr=''):
        """
        过滤除中英文及数字空格以外的其他字符
        """
        res = re.compile("[^\u4e00-\u9fa5^a-z^A-Z^0-9^ ^_^%^-]")
        return res.sub(restr, desstr)

    def delete_china_cookie(self, delete_cookies_list):
        """
        删除属性中国邮编的cookie delete_cookies_list 有 500 条要删除的
        """
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        if delete_cookies_list:
            print("删除cookie", len(delete_cookies_list))
            self.engine = self.mysql_connect()
            with self.engine.begin() as conn:
                for cookie_id in delete_cookies_list:
                    try:
                        del_sql = f"DELETE FROM {self.db_cookies} WHERE id={int(cookie_id)}"
                        print(del_sql)
                        conn.execute(del_sql)
                    except:
                        break

    def get_cookie(self):
        print("获取cookie，并返回")
        while True:
            if is_internet_available():
                break
            else:
                time.sleep(10)
        while True:
            try:
                cookie_dict = {}
                self.engine = self.mysql_connect()
                self.engine_pg = self.pg_connect()
                with self.engine.begin() as conn:
                    sql_read = f'SELECT cookies,id FROM {self.db_cookies} limit 350;'
                    print("获取cookie：", sql_read)
                    # a = conn.execute(sql_read)
                    # df_read = pd.DataFrame(a, columns=['cookies', 'id'])
                    df_read = self.engine.read_sql(sql_read)
                    clientPriceList = list(df_read.cookies + "|-|" + df_read.id.astype("U"))
                    for ck in clientPriceList:
                        cookie_dict[ck.split("|-|")[1]] = ck.split("|-|")[0]
                if cookie_dict:
                    print("cookie 字典大小：", len(cookie_dict))
                    return cookie_dict
                else:
                    print(f"{self.db_cookies}  没有cookie")
                    time.sleep(60)
            except Exception as e:
                print(e)
                continue

    def db_column(self, site):
        if site in ('us', 'de', 'uk'):
            asin_detail_table = f'select * from {site}_asin_detail_month_2025 limit 1'
        else:
            asin_detail_table = f'select * from {site}_asin_detail_2025 limit 1'
        print(asin_detail_table)
        # df = pd.read_sql(asin_detail_table, con=self.engine_pg)
        df = self.engine_pg.read_sql(asin_detail_table)
        # 获取字段名称
        columns_list = list(df.columns)
        columns_list.remove('id')
        columns_list.remove('updated_time')
        columns_list.remove('category_state')
        if site in ('fr','es','it'):
            columns_list.append('week')
        print(len(columns_list))
        print(columns_list)
        return columns_list

    # 检查是返回源码是否正确
    def check_amazon_not_page(self, response):
        # asin 已下架 状态 4 Listen Now
        if ("Page Not Found" in response) or ("We are sorry! This Gift Card is not available" in response) or (
                "500 - An error occurred" in response) or ("Sorry! Something went wrong!" in response):
            return True

    # 检查是不是正常商品页面
    def check_amazon_page(self, response, response_url):
        if (
                "How Amazon Pharmacy works" in response and "Sign in to Pharmacy" in response and "About this medication" in response) or (
                "pharmacy." in response_url) or (
                "Buy Amazon Coins" in response and "Sold and delivered by ACI Gift Cards LLC, an Amazon company" in response) or (
                "Youtubers Life" in response and "Become the most successful youtuber on the planet! Create videos" in response):
            return True

    # 检查是否被重定向
    def check_amazon_allow_redirects(self, response_url, asin):
        if ("keywords" in response_url) or ("dp/" not in response_url) or (
                "ref=" in response_url and "encoding=" in response_url) or (asin not in response_url) or (
                "ASIN=" in response_url and "ref_=lx_bd" in response_url)or('ref=rd_fr_' in response_url and f'ref=rd_fr_{asin}'in response_url)\
                or ('&ASIN=' in response_url):
            return True

    # 检查邮编是否正确。
    # 检查邮编是否正确。
    def check_amazon_ingress(self, ingress):
        if ("中国大陆" in ingress) or ("China" in ingress) or ("Hong" in ingress) or ("Chine" in ingress) or (
                "Cina" in ingress) or ("Update location" in ingress) or ('香' in ingress) or ("location" in ingress):
            return True

    # 检查请求是否出现验证码：
    def check_amazon_yzm(self, resp):
        if ("Enter the characters you see below" in resp.text) or (
                "Geben Sie die Zeichen unten ein" in resp.text) or (
                "Introduce los caracteres que se muestran" in resp.text) or (
                "Saisissez les caractères que vous voyez" in resp.text) or (
                "Inserisci i caratteri visualizzati nello spazio" in resp.text):
            print('验证码')
            return True

    # 组装请求头，
    def requests_amazon_headers(self, host=None, site_url=None, asin=None, scraper_url=None):
        n = random.randint(118, 124)
        ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{n}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
        headers = {
            'connection': 'close',
            'authority': host,
            'accept': 'text/html,*/*',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'no-cache',
            'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
            'sec-ch-ua-mobile': '?0',
            'user-agent': ua,
            "Host": self.host,
            "Pragma": "no-cache",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
            "Upgrade-Insecure-Requests": "1",
        }
        # if asin:
        #     headers['origin'] = f'{site_url}dp/{asin}'
        #     headers['referer'] = f'{site_url}dp/{asin}'
        if scraper_url:
            headers['origin'] = scraper_url
            headers['referer'] = scraper_url
        alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
                    's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
        k = ""
        for i in (0, random.randint(0, 26)):
            k += random.choice(alphabet)
        headers[k] = str(uuid.uuid4())
        return headers

    # 第二次发送请求。
    def requests_amazon(self, headers=None, scraper_url=None):
        for i in range(2):
            try:
                resp = requests.get(scraper_url, headers=headers, impersonate="chrome",
                                    timeout=10, verify=False)
                if self.check_amazon_yzm(resp):
                    print('验证码2222222222222222')
                    continue
                else:
                    # 获取邮编
                    try:
                        response_s = etree.HTML(resp.text)
                        ingress = response_s.xpath("//span[@id='glow-ingress-line2']/text()")
                    except:
                        continue
                    try:
                        ingress = ingress[0].strip()
                    except:
                        ingress = None
                    if ingress:
                        if ("Page Not Found" in resp.text) or (
                                "We are sorry! This Gift Card is not available" in resp.text) or (
                                "500 - An error occurred" in resp.text) or ("Sorry! Something went wrong!" in resp.text):
                            return None
                        else:
                            return resp.text
            except Exception as e:
                print('第二次请求报错:::', e)
                return None

    # 获取对应每个小时的数字。存到redis列表中
    def get_hour(self, new_date_hour):
        # new_date_hour = datetime.now().strftime("%Y-%m-%d:%H")
        # 获取当前日期
        current_date = datetime.now()
        # 将当前时间的小时、分钟和秒设置为0
        current_date = current_date.replace(hour=0, minute=0, second=0, microsecond=0)
        # 生成当天的24小时时间
        hourly_times = [current_date + timedelta(hours=i) for i in range(24)]
        hour_dict = {}
        # 打印每个小时的时间
        for hour_time in hourly_times:
            hour = hour_time.strftime("%Y-%m-%d:%H")
            num = re.findall(r':(\d+)', hour)[0]
            hour_dict[hour] = num
        print(new_date_hour, hour_dict)
        n = hour_dict[new_date_hour]
        return n

    # 组装cookie
    def get_cookie_str(self, cookies_queue):
        while 1:
            cookie_str = cookies_queue.get()
            if len(cookie_str) > 50:
                try:
                    cookie_lsit = json.loads(cookie_str)
                except:
                    cookie_lsit = eval(cookie_str)
                cookie_dic = {}
                try:
                    for i in cookie_lsit:
                        if i:
                            cookie_dic[i["name"]] = i["value"]
                        else:
                            continue
                    cookie_str = ''
                    for k, v in cookie_dic.items():
                        cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                    break
                except:
                    cookie_str = ''
                    for k, v in cookie_lsit.items():
                        cookie_str = cookie_str + str(k) + '=' + str(v) + ';'
                    break
            else:
                break
        return cookie_str

    # 获取自增id区间。根据传的站点获取对应的月 周 syn表的id
    def get_minid_maxid(self, site_name=None, state=None, minid_maxid=None, month=None, year_week=None):
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.engine = self.mysql_connect()
                with self.engine.begin() as conn:
                    if self.site_name in ('us', 'de', 'uk'):
                        if state == 3 and minid_maxid:
                            sql_update = f"""UPDATE {self.site_name}_syn_asin_all_minid_maxid set state=3 where minid_maxid='{minid_maxid}' and yaer_month = '2025_{month}'"""
                            print(sql_update)
                            conn.execute(sql_update)
                        sql_read = f"""SELECT id, minid_maxid FROM {self.site_name}_syn_asin_all_minid_maxid WHERE STATE = 1 and yaer_month = '2025_{month}' LIMIT 1"""
                        print('sql_read:::', sql_read)
                    else:
                        if state == 2 and minid_maxid:
                            sql_update = f"""UPDATE {self.site_name}_syn_asin_all_minid_maxid set state={state} where minid_maxid='{minid_maxid}' and yaer_week = '{year_week}'"""
                            print(sql_update)
                            conn.execute(sql_update)
                        elif state == 3 and minid_maxid:
                            sql_update = f"""UPDATE {self.site_name}_syn_asin_all_minid_maxid set state={state} where minid_maxid='{minid_maxid}' and yaer_week = '{year_week}'"""
                            print(sql_update)
                            conn.execute(sql_update)
                        sql_read = f"""SELECT id, minid_maxid FROM {self.site_name}_syn_asin_all_minid_maxid WHERE STATE = 1 and yaer_week = '{year_week}' LIMIT 1;"""
                    df_read = self.engine.read_sql(sql_read)
                    if df_read.shape[0] > 0:
                        minid_maxid_list = list(df_read.minid_maxid)
                        print(minid_maxid_list)
                    else:
                        minid_maxid_list = []
                    print('获取id 区间  self.minid_maxid_list:::', minid_maxid_list)
                    return minid_maxid_list
            except:
                print("获取id区间报错", f"\n{traceback.format_exc()}")
                time.sleep(15)

    def hex_md5(self, input_string):
        # 要加密的字符串
        # 创建一个MD5哈希对象
        md5_hash = hashlib.md5()
        # 使用输入字符串的字节更新哈希对象
        md5_hash.update(input_string.encode('utf-8'))
        # 获取哈希的十六进制表示
        md5_hex_digest = md5_hash.hexdigest()
        return md5_hex_digest
if __name__ == '__main__':
    Requests_param_val(site_name='uk').get_minid_maxid(month='07',state=1)
