import json
import random
import re
import time
import traceback

import pandas as pd
from curl_cffi import requests
from lxml import etree

from amazon_every_day_spider.secure_db_client import get_remote_engine

"""
打包命令：cd /d E:\Git_new\spider\py_spider
pyinstaller -F amazon_every_day_spider\Get_Cookies.py --clean --paths . --collect-submodules amazon_every_day_spider

"""


def get_cookie(site='us', zipCode='10010'):
    try:
        if site == "us":
            index_url = 'https://www.amazon.com'
            url_asin = 'https://www.amazon.com/dp/B0009X29WK'
            host = 'www.amazon.com'
        elif site == 'uk':
            index_url = 'https://www.amazon.co.uk'  # 站点url
            url_asin = 'https://www.amazon.co.uk/dp/B0714LLB2T'  # 站点url
            host = 'www.amazon.co.uk'
        elif site == 'de':
            index_url = 'https://www.amazon.de'
            url_asin = 'https://www.amazon.de/dp/B00006YYXM'
            host = 'www.amazon.de'
        elif site == 'fr':
            index_url = 'https://www.amazon.fr'
            url_asin = 'https://www.amazon.fr/dp/B0FK9JNPM5'
            host = 'www.amazon.fr'
        elif site == 'es':
            index_url = 'https://www.amazon.es'
            url_asin = 'https://www.amazon.es/dp/B0FDFVY9J6'
            host = 'www.amazon.es'
        elif site == 'it':
            index_url = 'https://www.amazon.it'
            url_asin = 'https://www.amazon.it/dp/B0F3C16GTF'
            host = 'www.amazon.it'
        elif site == 'ca':
            index_url = 'https://www.amazon.ca'
            url_asin = 'https://www.amazon.ca//dp/B08H3JPH74'
            host = 'www.amazon.ca'
        if site == 'ca':
            engine_us = get_remote_engine('us', 'mysql')
        else:
            engine_us = get_remote_engine(site, 'mysql')

        requ_see = requests.Session()
        headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Encoding': 'gzip, deflate, br, zstd',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'no-cache',
            'Pragma': 'no-cache',
            'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': ' "Windows"',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': ' none',
            'Sec-Fetch-User': '?1',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        }
        asin_resp = requ_see.get(url_asin, headers=headers)
        print("第一步 请求asin首页:", url_asin)
        html_xpath = etree.HTML(asin_resp.text)
        ingress = html_xpath.xpath("//span[@id='glow-ingress-line2']/text()")
        print("第一次发送请求，获取邮编：", ingress)

        data_a_modal = html_xpath.xpath("//span[@id='nav-global-location-data-modal-action']/@data-a-modal")
        data_modal = json.loads(data_a_modal[0])
        print('获取参数anti-csrftoken-a2z：', data_modal)
        headers['Anti-Csrftoken-A2z'] = data_modal['ajaxHeaders']['anti-csrftoken-a2z']
        clkci_url = f'{index_url}/portal-migration/hz/glow/get-rendered-address-selections?deviceType=desktop&pageType=Detail&storeContext=grocery&actionSource=desktop-modal&toasterType=AIS_INGRESS'
        headers['Referer'] = url_asin
        print('第二步点击')
        clkci_resp = requ_see.get(clkci_url, headers=headers)
        CSRF_TOKEN = re.findall('CSRF_TOKEN : "(.*?)",', clkci_resp.text)[0]
        print("CSRF_TOKEN:", CSRF_TOKEN)
        address_url = f'{index_url}/portal-migration/hz/glow/address-change?actionSource=glow'
        headers_post = {
            'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept-Encoding': 'gzip', }
        headers_post['Origin'] = index_url
        headers_post['Anti-Csrftoken-A2z'] = CSRF_TOKEN
        headers_post['Sec-Fetch-Site'] = 'same-origin'
        headers_post['Sec-Fetch-Mode'] = 'cors'
        headers_post['Sec-Fetch-Dest'] = 'empty'
        headers_post['Accept'] = '*/*'
        headers_post['Content-Type'] = 'application/json'
        headers_post['X-Requested-With'] = 'XMLHttpRequest'

        address_json = {"locationType": "LOCATION_INPUT", "zipCode": f"{zipCode}", "deviceType": "web",
                        "storeContext": "grocery",
                        "pageType": "Detail", "actionSource": "glow"}
        print('第三步 输入 邮编')
        post_resp = requ_see.post(address_url, headers=headers_post, json=address_json, verify=False,
                                  impersonate="chrome")
        print(post_resp.text)
        submit_headers = {
            'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept-Encoding': 'gzip',
            'Accept-Language': 'zh-CN,zh;q=0.9',
        }
        submit_headers['Accept'] = '*/*'
        submit_headers['Referer'] = url_asin
        submit_headers['X-Requested-With'] = 'XMLHttpRequest'
        submit_headers['Sec-Fetch-Site'] = 'same-origin'
        submit_headers['Sec-Fetch-Mode'] = 'cors'
        submit_headers['Sec-Fetch-Dest'] = 'empty'
        print('第四步。提交')
        detail_url = f'{index_url}/portal-migration/hz/glow/get-location-label?storeContext=grocery&pageType=Detail&actionSource=desktop-modal'
        requ_see.get(detail_url, headers=submit_headers, verify=False, impersonate="chrome")
        print(requ_see.cookies.get_dict())
        cookie_dict = requ_see.cookies.get_dict()
        index_resp = requests.get(index_url, headers=headers, cookies=cookie_dict, verify=False,
                                  impersonate="chrome")
        index_xpath = etree.HTML(index_resp.text)
        ingress = index_xpath.xpath("//span[@id='glow-ingress-line2']/text()")
        print("获取最新邮编：", ingress)
        if zipCode in ingress[0].strip() or "W1S 3" in ingress[0].strip() or 'M5B 2H' in ingress[0].strip():
            print(f"***************    当前获取 {site} 站点 cookie   邮编  {zipCode}  ********************")
            cookies = json.dumps(cookie_dict, ensure_ascii=False)
            item = {"site": site, 'zipCode': ingress[0].strip(), 'cookie': cookies}
            print(item)
            # 构造 DataFrame
            if site == 'ca':
                df = pd.DataFrame([{"cookies": cookies, "type": "DB", 'site': 'ca'}])
                # 存储到数据库
                engine_us.to_sql(df, 'other_site_cookies', if_exists="append")
            else:
                # 构造 DataFrame
                df = pd.DataFrame([{"cookies": cookies, "type": "DB"}])
                # 存储到数据库
                engine_us.to_sql(df, f"{site}_cookies", if_exists="append")
        print('\n')
    except Exception as e:
        print(f"获取 {site} 站点 cookie 报错，切换下一个站点", e)
        print("报错", f"\n{traceback.format_exc()}")
        time.sleep(random.uniform(2.5, 5.5))


if __name__ == '__main__':
    while True:
        get_cookie(site='us', zipCode='10010')
        # get_cookie(site='de', zipCode='10115')
        # get_cookie(site='uk', zipCode='W1S 3PR')
        # get_cookie(site='it', zipCode='00185')
        # get_cookie(site='es', zipCode='28001')
        # get_cookie(site='fr', zipCode='75019')
        # get_cookie(site='ca', zipCode='M5B 2H1')
        time.sleep(random.uniform(60.5, 180.5))
