import os
import requests
from urllib.parse import urlparse
from lxml import etree
import json
import re
os.environ['NO_PROXY'] = 'stackoverflow.com'
requests.packages.urllib3.disable_warnings()
import copy
# from curl_cffi import requests
import time
from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
import pandas as pd
from datetime import datetime


# 设置数据库连接信息
host = "192.168.10.223"
database = "selection"
user = "postgres"
password = "fazAqRRVV9vDmwDNRNb593ht5TxYVrfTyHJSJ3BS"
table_name = "detail_cookies_wj"
engine = create_engine(f"postgresql://{user}:{password}@{host}/{database}")
print(f"成功连接到{table_name}数据库")
class Get_cookie():
    def __init__(self, site='us', **kwargs):
        if site == 'us':
            self.url_ = 'https://www.amazon.com'
        elif site == 'de':
            self.url_ = "https://www.amazon.de"
        elif site == 'uk':
            self.url_ = "https://www.amazon.co.uk"
        elif site == 'it':
            self.url_ = "https://www.amazon.it"
        elif site == 'es':
            self.url_ = "https://www.amazon.es"
        elif site == 'fr':
            self.url_ = "https://www.amazon.fr"
        elif site == 'mx':
            self.url_ = "https://www.amazon.com.mx"
        elif site == 'ca':
            self.url_ = "https://www.amazon.ca"
        self.headers = {
            'Host': urlparse(self.url_).hostname,
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'referer': self.url_,
            'origin': self.url_,
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
            'accept-language': 'zh-CN,zh;q=0.9',}

        self.site = site
        self.site_dict = {
            "us": '10010',
            "de": '10115',
            "uk": 'W1S 3PR',
            "it": '00185',
            "es": '28001',
            "fr": '75019',
            "mx": '54607',
            "ca": 'M5B 2H1'
        }
        self.country_lc_main = {
            "lc_main": 'en_US',
            "lc-acbde": 'de_DE',
            "lc-acbuk": 'en_GB',
            "lc-acbit": 'it_IT',
            "lc-acbes": 'es_ES',
            "lc-acbfr": 'fr_FR',
            "lc-acbmx": 'es_MX',
            "lc-acbca": 'en_CA',
        }

    def request(self):
        session = requests.Session()
        response = session.get(url = self.url_, headers=self.headers,verify=False)
        html = etree.HTML(response.text)
        d = html.xpath(".//span[@id='nav-global-location-data-modal-action']/@data-a-modal")
        if d:
            # with open('tk_order_export_test_qiu.py','w',encoding='utf-8') as f:
            #     f.write(response.text)
            csrftoken = json.loads(d[0]).get("ajaxHeaders").get("anti-csrftoken-a2z")
            a = html.xpath(".//script[contains(text(), 'GwInstrumentation.markH1Af')]//text()")[0]
            uri = re.findall(r'uri: "(.*?)" }', a)
            self.headers["accept"] = "*/*"
            Cookie = response.headers['Set-Cookie']
            cookie_parts = Cookie.split(', ')

            cookie_dict = {}
            for part in cookie_parts:
                key, value = part.split('=', 1)
                if key in ['session-id', 'session-id-time', 'i18n-prefs']:
                    cookie_dict[key] = value.split(';')[0]

            url_get_ubid = self.url_ + uri[0]
            print(f'url_get_ubid: {url_get_ubid}')
            return csrftoken, cookie_dict, url_get_ubid

    def parse_data(self,csrftoken, ck, url_get_ubid):
        session = requests.Session()
        csrftoken = csrftoken
        ck = ck
        url = url_get_ubid
        response = session.get(url, headers=self.headers, verify=False)
        if response != 400:
            Cookie = response.headers['set-Cookie']
            c = {}
            for cookie_part in Cookie.split(", "):
                key, value = cookie_part.split("=", 1)
                c[key] = value
            up_cookies = ck.copy()
            up_cookies.update(c)
            headers = copy.deepcopy(self.headers)
            headers["anti-csrftoken-a2z"] = csrftoken
            if self.site in ["uk", "it", "de", "es", "fr", "mx", "ca"]:
                up_cookies = {'i18n-prefs': up_cookies.get("i18n-prefs"), 'session-id': up_cookies.get("session-id"), 'session-id-time': up_cookies.get("session-id-time"), 'sp-cdn': '"L5Z9:HK"', f'ubid-acb{self.site}': up_cookies.get(f"ubid-acb{self.site}")}
            else:
                up_cookies = {'i18n-prefs': up_cookies.get("i18n-prefs"), 'session-id': up_cookies.get("session-id"), 'session-id-time': up_cookies.get("session-id-time"), 'sp-cdn': '"L5Z9:CN"', 'ubid-main': up_cookies.get("ubid-main")}
            url = f'{self.url_}/portal-migration/hz/glow/get-rendered-address-selections?deviceType=desktop&pageType=Gateway&storeContext=NoStoreName&actionSource=desktop-modal'
            print(f'parse_data: {url}')
            return url,up_cookies,headers
    def parse_anti(self, url,up_cookies,headers):
        session = requests.Session()
        print('parse_anti')
        print(headers)
        print(url)
        response = session.get(url, headers=headers, verify=False)
        print(response)
        print(response.text)
        if anti := re.findall('CSRF_TOKEN : "(.*?)"', response.text):
            print(anti)
            self.headers["anti-csrftoken-a2z"] = anti[0]
            data = '{"locationType":"LOCATION_INPUT","zipCode":"%s","storeContext":"generic","deviceType":"web","pageType":"Gateway","actionSource":"glow"}' % self.site_dict.get(self.site)
            headers = copy.deepcopy(self.headers)
            url = f'https://{urlparse(response.url).hostname}/portal-migration/hz/glow/address-change?actionSource=glow'
            print(f'parse_anti: {url}')
            return url,headers,up_cookies,data
    def parse_zip(self, url,headers,up_cookies,data):
        session = requests.Session()
        print('parse_zip')
        response = session.post(url, headers=headers, data =json.loads(data),cookies=up_cookies,verify=False)
        items = json.loads(response.text)

        if items.get('address'):
            if items.get('address').get('countryCode'):
                cookies = up_cookies
                if cookies.get("session-id") and cookies.get("session-id-time") and (cookies.get("ubid-main") or cookies.get(f"ubid-acb{self.site}")):
                    if self.site == "us":
                        cookies["lc-main"] = 'en_US'
                    else:
                        cookies[f"lc-acb{self.site}"] = self.country_lc_main.get(f"lc-acb{self.site}")
                    c = json.dumps(cookies)
                    cookie = c.replace('; Domain=.amazon.com; Expires=Wed','')
                    # print(cookie)
                    current_time = datetime.now()
                    created_time = current_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
                    item = {}
                    item['cookie_value'] = cookie
                    item['created_time'] = created_time
                    print(f'cookie: {cookie}')
                    return item

    def save_cookies(self, item):
        # 确保item中包含cookie_value且其值非空
        if 'cookie_value' not in item or item['cookie_value'] is None:
            print("cookie_value缺失或为空，保存失败！")
            return

        columns = ['cookie_value', 'created_time']
        df = pd.DataFrame([item], columns=columns)

        try:
            # 不再使用index参数，假设数据库表已有自增ID设置
            df.to_sql(
                name=table_name,
                con=engine,
                if_exists='append',
                index=False  # 不使用DataFrame的索引作为数据库的id
            )
            print("cookie保存成功！")
        except Exception as e:
            print(f"cookie保存失败: {e}")


    def run(self):
        successful_saves = 0
        max_attempts = 1  # 设定最大尝试次数
        while successful_saves < 1:
            try:
                # 尝试执行整个获取和解析流程
                csrftoken, cookie_dict, url_get_ubid = self.request()
                # if not all([csrftoken,  cookie_dict, url_get_ubid]):
                #     raise ValueError("获取数据不完整")  # 如果请求数据不全，抛出异常以便重新尝试

                url, up_cookies, headers = self.parse_data(csrftoken, cookie_dict, url_get_ubid)
                url, headers, up_cookies, data = self.parse_anti(url, up_cookies, headers)
                item = self.parse_zip(url, headers, up_cookies, data)

                if item is None:  # 检查parse_zip返回的item是否有值
                    print("parse_zip未返回有效数据，将重新尝试...")
                    continue  # 重新开始循环尝试

                # 如果一切正常，保存数据
                self.save_cookies(item)
                successful_saves += 1
                print(f"成功保存第{successful_saves}个cookie")
            except Exception as e:
                print(f"在处理过程中遇到错误: {e}. 准备重新尝试...")
                continue  # 遇到任何异常都重新尝试

            # 检查是否达到保存目标或尝试次数上限
            if successful_saves >= 2:
                break
        print(f"所有尝试结束，共成功保存{successful_saves}个cookie。")

        # successful_saves = 0
        # max_attempts = 50  # 总尝试次数
        # attempt_count = 0
        #
        # while attempt_count < max_attempts and successful_saves < 50:
        #     try:
        #         csrftoken, handle_httpstatus_all, cookie_dict, url_get_ubid = self.request()
        #         url,up_cookies,headers = self.parse_data(csrftoken, handle_httpstatus_all, cookie_dict, url_get_ubid)
        #         url,headers,up_cookies,data = self.parse_anti(url, up_cookies, headers)
        #         item = self.parse_zip(url, headers, up_cookies, data)
        #         # self.save_cookies(item)
        #         print('成功')
        #     except Exception as e:
        #         print(f"保存cookie时发生错误: {e}. 将重新尝试...")
        #         continue
        # # # print(cookie)
        # # # return cookie



if __name__ == '__main__':
    get_cookie = Get_cookie()
    get_cookie.run()