import os
import re
import time
import json
import base64
import logging
import threading
import redis as rd
from io import BytesIO
from datetime import datetime
from scrapy.selector import Selector
from curl_cffi import requests, curl, const
# from curl_cffi.requests.exceptions import InvalidURL


os.environ['NO_PROXY'] = 'stackoverflow.com'
# import requests


logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s', level=logging.INFO)

REDIS = {
    # 'host': '127.0.0.1',
    'host': 'wx.yswg.com.cn',
    'port': 6379,
    'password': 'yswg@2019',
    'db': 1
}


YSWG_REDIS = {
    # 'host': '127.0.0.1',
    'host': '120.79.147.190',
    'port': 6379,
    'password': 'fG7#vT6kQ1pX',
    'db': 10
}


def singleton(cls, *args, **kw):
    """singleton mode.

    :param cls: classname
    :param args: args.
    :param kw: kwargs.
    :return:
    """

    instances = {}

    def _singleton():
        if cls not in instances:
            instances[cls] = cls(*args, **kw)
        return instances[cls]

    return _singleton


@singleton
class Redis(object):
    def __init__(self):
        self.host = REDIS['host']
        self.port = REDIS['port']
        self.db = REDIS['db']
        self.password = REDIS['password']
        self.pool = rd.ConnectionPool(
            host=self.host,
            port=self.port,
            db=self.db,
            password=self.password,
            max_connections=3,
            socket_timeout=5,
            socket_connect_timeout=5,
            retry_on_timeout=True,
        )

    def get_instance(self):
        return rd.Redis(connection_pool=self.pool)


@singleton
class YswgRedis(object):
    def __init__(self):
        self.host = YSWG_REDIS['host']
        self.port = YSWG_REDIS['port']
        self.db = YSWG_REDIS['db']
        self.password = YSWG_REDIS['password']
        self.pool = rd.ConnectionPool(
            host=self.host,
            port=self.port,
            db=self.db,
            password=self.password,
            max_connections=3,
            socket_timeout=5,
            socket_connect_timeout=5,
            retry_on_timeout=True,
        )

    def get_instance(self):
        return rd.Redis(connection_pool=self.pool)


def md5(src: str, algorithm: str = "md5", digits: int = 32) -> str:
    import hashlib
    """md5 algorithms.

    :param src: original string.
    :param algorithm: algorithm method.
    :param digits: 16 length or 32 length.
    :return: string.
    """

    algorithm = hashlib.new(algorithm)
    algorithm.update(src.encode('utf8'))
    if digits == 16:
        return algorithm.hexdigest()[8:24]
    else:
        return algorithm.hexdigest()


def spop(key, count=20) -> list:
    """spop

    :param count:
    :param key:
    :return:
    """

    r = Redis().get_instance()
    return r.spop(key, count)


def srem(key, value, md=False):
    """remove a key-value from set.

    :param key: key
    :param value: value
    :return: boolean
    """

    r = YswgRedis().get_instance()
    if md:
        member = md5(value, digits=16)
    else:
        member = value
    res = r.srem(key, member)
    return res


def srandmembers(key, count):
    """SRANDMEMBER

    :param key: key
    :return:
    """
    r = YswgRedis().get_instance()
    return r.srandmember(key, count)


class AccessApi(object):
    def __init__(self):
        # self.cookie_dict = {}
        self.seed_key = 'finance:cds_account_queue'
        self.seeds = [[{'@class': 'com.alibaba.fastjson.JSONObject', 'phoneSuffix': '6793', 'u_key': 54, 'pwd': 'TBI15092022', 'account': '335829635920'}]]

    def get_seed(self):
        # seeds = self.seeds.pop()
        seeds = spop(self.seed_key, 5)
        if seeds:
            return seeds
        else:
            return None

    def get_mail(self, seed):
        for i in range(6):
            url = "http://wx.yswg.com.cn:8000/microservice-finance/finance/cdcAccount/startVerify"

            data = {'token': "dacce869-0471-4ec7-ac50-3b3b1ec22c87", 'u_key': seed.get('u_key'), "expire": 600}

            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            }

            try:
                responses = requests.post(url, json=data, headers=headers, timeout=10)
            except curl.CurlError as e:
                time.sleep(2)
                if e.code == const.CurlECode.OPERATION_TIMEDOUT:
                    logging.info(f"timeout error -->{e}")
                    continue
                else:
                    logging.info(f"CurlError error -->{e}")
                    continue
            if 'msg":"成功' in responses.text:
                logging.info(f"开始处理 ：{responses.text}")
                return True
            else:
                if i == 5:
                    return False
                else:
                    continue

    def wait_yzm(self, seed):
        while True:
            url = "http://wx.yswg.com.cn:8000/microservice-finance/finance/cdcAccount/getCdcVerifyStatus"

            data = {'token': "dacce869-0471-4ec7-ac50-3b3b1ec22c87", 'u_key': seed.get("u_key")}

            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            }
            try:
                responses = requests.post(url, json=data, headers=headers, timeout=6)
            except curl.CurlError as e:
                if e.code == const.CurlECode.OPERATION_TIMEDOUT:
                    logging.info(f"timeout error -->{e}")
                    continue
                else:
                    logging.info(f"CurlError error -->{e}")
                    continue
            data = responses.json()
            logging.info(f"进度显示:{data}")
            if data['data']['status'] == '待发送验证码':
                time.sleep(3)
                continue
            elif data['data']['status'] == '待获取验证码':
                return data['data']['status']
            elif data['data']['status'] == '验证码已填写':
                return data['data']['code']
            elif data['data']['status'] == '已失效':
                return "已失效"
            elif data['data']['status'] == '已发送验证码':
                time.sleep(3)
                continue
            elif data['data']['status'] == '已完成':
                return "已完成"

    def error_msg_seed(self, error_msg, seed):
        for i in range(5):
            url = 'http://wx.yswg.com.cn:8000/microservice-finance/finance/cdcAccount/downloadStatusChange'
            data = {'token': "dacce869-0471-4ec7-ac50-3b3b1ec22c87", 'u_key': seed['u_key'], "status": error_msg}
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            }
            try:
                responses = requests.post(url, json=data, headers=headers, timeout=6)
                logging.info(f"修改 显示信息{responses.text}")
                return
            except curl.CurlError as e:
                if e.code == const.CurlECode.OPERATION_TIMEDOUT:
                    logging.info(f"timeout error -->{e}")
                    continue
                else:
                    logging.info(f"CurlError error -->{e}")
                    continue

    def time_to_num(self, t):
        time_dict = {
            'June': "06",
            'May': "05",
            'April': "04",
            'March': "03",
            'February': "02",
            "January": "01",
            "July": "07",
            "August": "08",
            "September": "09",
            "October": "10",
            "November": "11",
            "December": "12",
        }
        s = t.split(" ")
        return s[-1] + time_dict[s[0]]

    def get_cookies(self):
        headers = {
            "Host": "www.tax.service.gov.uk",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "referer": "https://www.gov.uk/",
            "accept-language": "zh-CN,zh;q=0.9",
        }
        url = "https://www.tax.service.gov.uk/customs/payment-records"

        response = self.request_dis(url=url, method="GET", headers=headers, allow_redirects=False, timeout=10)
        # response = requests.get(url, headers=headers, proxies=proxies, allow_redirects=False)
        # ---------------------1
        cookies = dict(response.cookies)

        url = "https://www.tax.service.gov.uk/bas-gateway/sign-in"
        params = {
            "continue_url": "/customs/payment-records"
        }

        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, params=params, allow_redirects=False, timeout=10)
        # ---------------------2
        cookies = dict(response.cookies)
        url = response.headers['location']

        url = "https://www.tax.service.gov.uk" + url if "http" not in url else url

        headers = {
            "Host": "www.tax.service.gov.uk",
            "upgrade-insecure-requests": "1",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "accept-language": "zh-CN,zh;q=0.9",
        }

        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=False, timeout=10)
        # ---------------------3
        url = response.headers['location']

        url = f"https://www.tax.service.gov.uk{url}" if "http" not in url else url
        cookies = dict(response.cookies)

        headers = {
            "authority": "www.tax.service.gov.uk",
            "host": "www.tax.service.gov.uk",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "accept-language": "zh-CN,zh;q=0.9",
            "priority": "u=0, i"
        }
        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=False, timeout=10)
        # ---------------------4

        url = response.headers['location']
        url = f"https://www.tax.service.gov.uk{url}" if "http" not in url else url
        cookies = dict(response.cookies)
        headers = {
            "Host": "www.access.service.gov.uk",
            # "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",

            "Accept-Language": "zh-CN,zh;q=0.9"
        }

        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=False, timeout=10)
        cookies = dict(response.cookies)
        # ---------------------5

        url = response.headers['location']
        url = f"https://www.access.service.gov.uk{url}" if "http" not in url else url

        headers = {
            "Host": "www.access.service.gov.uk",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Accept-Language": "zh-CN,zh;q=0.9"
        }
        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=False, timeout=10)
        # ---------------------6
        cookies = dict(response.cookies)

        return cookies

    def push_file_api(self, k, v, seed):
        import requests as rq
        url = "http://wx.yswg.com.cn:8000/microservice-finance/finance/cdcAccount/upload"

        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
        }
        # # 如果月份为1月 减1则为0月  如果为0则改为12
        # new_y = str(time.localtime().tm_mon - 1 or 12)
        # if len(new_y) == 1:
        #     new_y = "0" + new_y
        # month = str(time.localtime().tm_year) + new_y
        # logging.info(f"文件上传 时间为 {month}")
        data = {
            'token': "dacce869-0471-4ec7-ac50-3b3b1ec22c87",
            # 'type': type_,  # C79 C88
            'u_key': seed['u_key'],
            'month': k,
            # 'file': f
        }
        logging.info(f"文件上传 时传递参数为 {data}")
        while True:
        # for i in range(5):
            try:
                msg = rq.post(url, data=data, headers=headers, files=v, timeout=60).json()
                time.sleep(3)
                if msg.get('code') in [400, 200]:
                    logging.info(f"文件上传成功 {seed.get('account')} {msg}")
                    return True
                else:
                    logging.info(f"文件上传失败重试")
                    continue
            except curl.CurlError as e:
                if e.code == const.CurlECode.OPERATION_TIMEDOUT:
                    logging.info(f"timeout error -->{e}")
                    continue
                else:
                    logging.info(f"CurlError error -->{e}")
                    continue
            except rq.exceptions.Timeout as e:
                logging.info(f"timeout error -->{e}")
                continue
            except rq.exceptions.ConnectionError:
                logging.info(f"timeout error -->{e}")
                continue

    def push_html_api(self, c79, c88, seed):
        import requests as rq

        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
        }

        data = {
            'token': "dacce869-0471-4ec7-ac50-3b3b1ec22c87",
            'u_key': seed['u_key'],
        }
        c79_html = BytesIO(c79.content)
        c79_html.name = f'c79_{int(time.time())}.html'

        c88_html = BytesIO(c88.content)
        c88_html.name = f'c88_{int(time.time())}.html'
        files = {
            'file_c79': c79_html,
            'file_c88': c88_html,
        }
        url = "http://wx.yswg.com.cn:8000/microservice-finance/finance/cdcAccount/saveHtml"
        logging.info(f"文件上传 时传递参数为 {data}")
        while True:
        # for i in range(5):
            try:
                msg = rq.post(url, data=data, headers=headers, files=files, timeout=60).json()
                time.sleep(3)
                if msg.get('code') in [400, 200]:
                    logging.info(f"html文件上传成功 {seed.get('account')} {msg}")
                    return True
                else:
                    logging.info(f"html文件上传失败重试")
                    continue
            except curl.CurlError as e:
                if e.code == const.CurlECode.OPERATION_TIMEDOUT:
                    logging.info(f"timeout error -->{e}")
                    continue
                else:
                    logging.info(f"CurlError error -->{e}")
                    continue
            except rq.exceptions.Timeout as e:
                logging.info(f"timeout error -->{e}")
                continue
            except rq.exceptions.ConnectionError:
                logging.info(f"timeout error -->{e}")
                continue

    def push_file_new(self, files, seed):
        for k, v in files.items():
            self.push_file_api(k, v, seed)
        return True


    def get_vat(self, cookies, seed):
        headers = {
            "Host": "www.tax.service.gov.uk",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "referer": "https://www.tax.service.gov.uk/customs/payment-records",
            "accept-language": "zh-CN,zh;q=0.9",
        }

        url = "https://www.tax.service.gov.uk/customs/documents/postponed-vat"
        params = {
            "location": "CDS"
        }
        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, params=params, timeout=10)
        # ukey_c79 / c88_时间戳.html
        # with open(f"./tax_htmls/{seed.get('u_key')}_c88_{int(time.time())}.html", "w", encoding='utf-8') as f:
        #     f.write(response.text)
        # response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, params=params, timeout=10)

        sel = Selector(text=response.text, type="html")

        c88_dict = {}
        for c88 in sel.xpath(".//dl[@class='govuk-summary-list statement-list pVat-statements']/div"):
            t = c88.xpath(".//dt//text()").get("").strip()
            logging.info(f"打印未处理时间格式{t}")
            month = self.time_to_num(t)
            logging.info(f"打印处理后时间格式{month}")
            VAT_download_link = c88.xpath(".//dd/a/@href").get("")
            text_C88 = c88.xpath(".//dd//text()").getall()
            text_C88 = [i.strip() for i in text_C88 if i.strip()]
            logging.info(f"下载节点 文本{text_C88}")
            if not c88_dict.get(month):
                c88_dict[month] = {}
            if (("No statement" in text_C88) or (not VAT_download_link)):
                logging.info("无文件")
                c88_dict[month]['file_c88'] = None
                # self.push_file('', 'C88', t, seed)
                # logging.info("C88 文件上传成功")
                # return {"file_c88": None}
            else:
                logging.info("有文件")
                logging.info(f"c88 文件url {VAT_download_link}")
                c88_response = self.request_dis(url=VAT_download_link, method="GET", timeout=10)
                c88_pdf = BytesIO(c88_response.content)
                c88_pdf.name = 'c88_pdf.pdf'
                c88_dict[month]['file_c88'] = c88_pdf
                # self.push_file(response.content, 'C88', t, seed)
                # return {"file_c88": c88_pdf}
        return [c88_dict, response]


    def get_c79(self, cookies, seed):
        headers = {
            "Host": "www.tax.service.gov.uk",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "referer": "https://www.tax.service.gov.uk/customs/payment-records",
            "accept-language": "zh-CN,zh;q=0.9",
        }

        url = "https://www.tax.service.gov.uk/customs/documents/import-vat"
        # response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, timeout=10)
        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, timeout=10)
        # with open(f"./tax_htmls/{seed.get('u_key')}_c79_{int(time.time())}.html", "w", encoding='utf-8') as f:
        #     f.write(response.text)
        sel = Selector(text=response.text, type="html")

        c79_dict = {}
        for c79 in sel.xpath(".//dl[@class='govuk-summary-list statement-list c79-statements']/div"):
            t = c79.xpath(".//dt//text()").get("").strip()
            logging.info(f"打印未处理时间格式{t}")
            month = self.time_to_num(t)
            logging.info(f"打印处理后时间格式{month}")
            download_link = c79.xpath(".//a/@href").get("")
            text_C79 = c79.xpath(".//dd//text()").getall()
            text_C79 = [i.strip() for i in text_C79 if i.strip()]
            logging.info(f"下载节点 文本{text_C79}")
            if not c79_dict.get(month):
                c79_dict[month] = {}
            if (('There were no certificates in August.' in text_C79) or (not download_link)):
                logging.info("无文件")
                c79_dict[month]['file_c79'] = None
                # self.push_file("", 'C79', t, seed)
                # logging.info("C79 文件上传成功")
                # return {"file_c79": None}
            else:
                logging.info("有文件")
                logging.info(f"C79 文件url {download_link}")

                c79_response = self.request_dis(url=download_link, method="GET", timeout=10)
                # response = requests.get(download_link, timeout=10)
                c79_pdf = BytesIO(c79_response.content)
                c79_pdf.name = "c79_pdf.pdf"
                # self.push_file(response.content, 'C79', t, seed)
                c79_dict[month]['file_c79'] = c79_pdf
                # return {"file_c79": c79_pdf}
        return [c79_dict, response]


    def request_dis(self, url="", method="GET", headers=None, params=None, cookies=None, data=None, timeout=10, allow_redirects=True):
        for i in range(10):
            proxies = self.get_proxies()
            try:
                if i == 6:
                    proxies = {}
                if method == "GET":
                    response = requests.get(url, headers=headers, cookies=cookies, params=params, proxies=proxies, allow_redirects=allow_redirects, timeout=timeout, verify=False)
                else:
                    response = requests.post(url, headers=headers, cookies=cookies, proxies=proxies, data=data, allow_redirects=allow_redirects, timeout=timeout, verify=False)
                logging.info(f'{response.status_code}, {url}')
                return response
            except requests.errors.RequestsError as e:
                logging.info(f"代理错误重试:{e}")
                if i == 9:
                    logging.info(f"代理错误 失败次数为9:{e}")
                    raise requests.errors.RequestsError
                continue

    def api_login(self, cookies, seed):
        logging.info(f"准备发送验证码 账号为：{seed.get('account')}")
        headers = {
            "Host": "www.access.service.gov.uk",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Referer": "https://www.gov.uk/",
            "Accept-Language": "zh-CN,zh;q=0.9"
        }

        url = "https://www.access.service.gov.uk/login/signin/creds"
        response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, timeout=10)
        # response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, timeout=10)
        # ---------------------7

        cookies = dict(response.cookies)
        s = '{"version":1,"userAgent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36","language":"zh-CN","colorDepth":24,"resolution":"1080x1920","timezone":-480,"clientSideTimestamp":%s,"sessionStorage":true,"localStorage":true,"indexedDB":true,"platform":"Win32","doNotTrack":false,"numberOfPlugins":5,"plugins":["PDF Viewer","Chrome PDF Viewer","Chromium PDF Viewer","Microsoft Edge PDF Viewer","WebKit built-in PDF"]}' % int(time.time()*1000)
        sel = Selector(text=response.text, type="html")
        tes = sel.xpath(".//form[@id='loginForm']/input[@name='tes']/@value").get()
        data = {
            "tes": tes,
            "csrfToken": cookies.get('CSRF-Token'),
            "user_id": seed.get("account"),
            "password": seed.get("pwd"),
            "profile": str(base64.b64encode(s.encode('utf-8')), 'utf-8'),
        }
        headers = {
            "Host": "www.access.service.gov.uk",
            "Origin": "https://www.access.service.gov.uk",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Referer": "https://www.access.service.gov.uk/login/signin/creds",
            "Accept-Language": "zh-CN,zh;q=0.9"
        }
        url = "https://www.access.service.gov.uk/login/signin/creds"
        response = self.request_dis(url=url, method="POST", headers=headers, cookies=cookies, data=data, timeout=10)
        # response = requests.post(url, headers=headers, cookies=cookies, data=data, proxies=proxies, timeout=10)
        sel = Selector(text=response.text, type="html")
        logging.info(f"验证码接口请求状态码为:{response.status_code} 账号：{seed.get('account')}")
        if response.status_code != 200:
            # 账号登录失败错误 返回400
            if response.status_code == 400:
                # 返回400有异常信息 返回账号异常
                if sel.xpath(".//span[@id='user_id-error']").get():
                    self.error_msg_seed("账号异常", seed)
                    logging.info(f"账号异常{seed.get('account')}")
                    return False
            self.error_msg_seed("代理ip请求失败", seed)
            logging.info(f"代理ip请求失败{seed.get('account')}")
            return False
        if "you want to get an access code" in sel.xpath(".//h1[@class='govuk-fieldset__heading']//text()").get(""):
            phone_text = sel.xpath(".//b[contains(.//text(), 'Text message')]/parent::*").getall()
            if len(phone_text) > 1:
                logging.info('绑定手机号过多')
                from_phone = [re.findall('\d+', i) for i in phone_text if re.findall('\d+', i)[1] == seed.get('phoneSuffix')[-4::]]
                if not from_phone:
                    self.error_msg_seed("手机号错误", seed)
                    logging.info(f"手机号错误 {seed.get('account')}")
                    return False
                factor = from_phone[0][0]
            else:
                factor = "0"
            logging.info(f"需要请求其他接口{seed.get('account')}")
            headers = {
                "Host": "www.access.service.gov.uk",
                "Origin": "https://www.access.service.gov.uk",
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                "Referer": "https://www.access.service.gov.uk/multi-factor/choose-factor/ayp/tbf/26e022b5-64f6-49f2-890b-853e17d5ab17",
                "Accept-Language": "zh-CN,zh;q=0.9"
            }
            url = sel.xpath(".//form[@method='POST']/@action").get()
            tes = sel.xpath(".//form/input[@name='tes']/@value").get()
            csrfToken = sel.xpath(".//form/input[@name='csrfToken']/@value").get()

            url = f"https://www.access.service.gov.uk{url}"
            data = {
                "tes": tes,
                "csrfToken": csrfToken,
                "factor": factor
            }
            response = self.request_dis(url=url, method="POST", headers=headers, cookies=cookies, data=data, allow_redirects=False, timeout=10)

            # response = requests.post(url, headers=headers, cookies=cookies, proxies=proxies, data=data, allow_redirects=False)
            cookies = dict(response.cookies)
            url = response.headers['location']
            url = f"https://www.access.service.gov.uk{url}" if "http" not in url else url
            headers = {
                "Host": "www.access.service.gov.uk",
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                "Referer": "https://www.access.service.gov.uk/multi-factor/choose-factor/ayp/tbf/26e022b5-64f6-49f2-890b-853e17d5ab17",
                "Accept-Language": "zh-CN,zh;q=0.9"
            }
            # response = requests.get(url, headers=headers, proxies=proxies, cookies=cookies)
            response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, timeout=10)
        sel = Selector(text=response.text, type="html")
        url = sel.xpath(".//form[@method='POST']/@action").get()
        tes = sel.xpath(".//form/input[@name='tes']/@value").get()
        csrfToken = sel.xpath(".//form/input[@name='csrfToken']/@value").get()
        cookies = {}
        for hzc in response.cookies.jar:
            cookies[hzc.name] = hzc.value
        # cookies = dict(response.cookies)
        self.error_msg_seed("已发送验证码", seed)
        code = self.wait_yzm(seed)
        if code != "已失效":
        # if True:
            url = f"https://www.access.service.gov.uk{url}"
            headers['Referer'] = "https://www.access.service.gov.uk/multi-factor/challenge/ayp/tbf/1c7f3861-1a3f-4435-9a24-720d86cbac79"
            headers['content-type'] = "application/x-www-form-urlencoded"
            # code = input("请输入验证码:")  # 验证码
            data = {
                "tes": tes,
                "csrfToken": csrfToken,
                "oneTimePassword": code.replace('"', "").replace("'", ""),  # 验证码
                "rememberMe": "true",
            }
            response = self.request_dis(url=url, method="POST", headers=headers, cookies=cookies, data=data, allow_redirects=False, timeout=10)

            # response = requests.post(url, headers=headers, cookies=cookies, data=data, proxies=proxies, allow_redirects=False, timeout=10)

            cookies = dict(response.cookies)

            sel = Selector(text=response.text, type="html")
            oneTimePassword = sel.xpath(".//li/a[@href='#oneTimePassword']").get()
            if oneTimePassword:
                logging.info(f"验证码异常{seed.get('account')}")
                self.error_msg_seed("验证码错误", seed)
                return False
            logging.info(f"验证码正确 账号：{seed.get('account')}")
            url = response.headers['location']
            url = f"https://www.access.service.gov.uk{url}" if "http" not in url else url
            headers = {
                "Host": "www.tax.service.gov.uk",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
                "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                "referer": "https://www.access.service.gov.uk/",
                "accept-language": "zh-CN,zh;q=0.9",
            }
            response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=False, timeout=10)

            # response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, allow_redirects=False, timeout=10)

            cookies = dict(response.cookies)

            url = "https://www.tax.service.gov.uk/customs/payment-records"

            response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, allow_redirects=True, timeout=10)
            cookies = dict(response.cookies)
            # response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, allow_redirects=False, timeout=10)
            # 检测是否邮箱异常
            sel = Selector(text=response.text, type="html")
            email_error = sel.xpath("//h1[contains(text(), 'email address')]/text()").get()
            if email_error:
                logging.info(f"邮箱异常 需要验证邮箱 {seed.get('account')}")
                verify_url = sel.xpath("//a[contains(text(), 'Verify or change email')]/@href").get()
                response = self.request_dis(url=verify_url, method="GET",  headers=headers, cookies=cookies, timeout=10)
                cookies = dict(response.cookies)
                sel = Selector(text=response.text, type="html")
                csrfToken = sel.xpath(".//form/input[@name='csrfToken']/@value").get()
                data = {
                    "csrfToken": csrfToken,
                    "isVerify": "true"
                }
                url = "https://www.tax.service.gov.uk/manage-email-cds/verify-change-email"
                headers = {
                    "Host": "www.tax.service.gov.uk",
                    "origin": "https://www.tax.service.gov.uk",
                    "content-type": "application/x-www-form-urlencoded",
                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
                    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                    "referer": "https://www.tax.service.gov.uk/manage-email-cds/verify-change-email/create",
                    "accept-language": "zh-CN,zh;q=0.9",
                }
                response = self.request_dis(url=url, method="POST", headers=headers, cookies=cookies, data=data, timeout=10)
                cookies = dict(response.cookies)
                url = "https://www.tax.service.gov.uk/customs/payment-records"
                response = self.request_dis(url=url, method="GET", headers=headers, cookies=cookies, timeout=10)
                cookies = dict(response.cookies)
            sel = Selector(text=response.text, type="html")
            user_name_error = sel.xpath(".//h2[@class='govuk-heading-m']//text()").get("")
            if "subscribed to" in user_name_error:
                logging.info(f"账号订阅异常{seed.get('account')}")
                self.error_msg_seed("账号订阅异常", seed)
                return False
            # for i in range(5):
            #     try:
            c78_dict, c78_response = self.get_c79(cookies, seed)

            c88_dict, c88_response = self.get_vat(cookies, seed)

            self.push_html_api(c78_response, c88_response, seed)
                #     break
                # except InvalidURL as e:
                #     logging.info(f"请求文件失败 重试{e}")
                #     if i == 4:
                #         self.error_msg_seed("代理ip请求失败", seed)
                #         logging.info(f"代理ip请求失败{seed.get('account')}")
                #         return False
                #     continue
            all_keys = set(c88_dict.keys()).union(c78_dict.keys())
            merged = {}
            for key in all_keys:
                merged[key] = {
                    'file_c88': c88_dict.get(key, {}).get('file_c88', None),
                    'file_c79': c78_dict.get(key, {}).get('file_c79', None)
                }
            logging.info(f"长度 {len(c78_dict)} {c78_dict}")
            if self.push_file_new(c78_dict, seed):
                self.error_msg_seed("下载完成", seed)
                logging.info(f"CDS上传成功{seed}")
                return True
            else:
                self.error_msg_seed("已失效", seed)
                logging.info(f"已失效{seed.get('account')}")
                return False
        else:
            self.error_msg_seed("已失效", seed)
            logging.info(f"已失效{seed.get('account')}")
            return False

    def get_proxies(self):
        ip = srandmembers('ip_lists', 1)
        if not ip:
            proxies = {
            }
        else:
            print(f"ip --> {str(ip[0], 'utf-8')}")
            proxies = {
                'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
            }
        return proxies

    def run(self, seed):
        # cookies = self.get_cookies()
        # self.api_login(cookies, seed)
        if not self.get_mail(seed):
            self.error_msg_seed("向用户发送请求验证码请求失败", seed)
            return False
        if wait_msg := self.wait_yzm(seed):
            if wait_msg == '待获取验证码':
                while True:
                    try:
                        cookies = self.get_cookies()
                        self.api_login(cookies, seed)
                        logging.info(f"任务完成 {seed}")
                        break
                    except Exception as e:
                        logging.info(f"重试异常退出 error {str(e)} {seed}")
                        self.error_msg_seed("重试异常退出 稍后重试 异常为: error", seed)
                        continue
            elif wait_msg == '已完成':
                self.error_msg_seed("已完成", seed)
                logging.info("已完成")
                return True
        else:
            self.error_msg_seed("已失效", seed)
            logging.info("已失效")
            return False

    def main(self):
        """
        英国cds账号下载 API调用处理
        :return:
        """
        # self.get_cookies()
        while True:
            if datetime.today().weekday() in [5, 6] or time.localtime().tm_hour not in [10, 11, 15, 16, 17]:
                logging.info('周六 周日 休息时间, 停止发送 Rest time no task sleep 30s')
                time.sleep(30)
                continue
            try:
                seeds = self.get_seed()
            except rd.exceptions.ConnectionError as e:
                logging.info(f"ConnectionError error {e}")
                continue
            if seeds:
                threads = []
                for i in seeds:
                    # thread = threading.Thread(target=self.run, args=(i,))
                    thread = threading.Thread(target=self.run, args=(json.loads(i),))
                    threads.append(thread)
                    thread.start()
                for thread in threads:
                    thread.join()
                logging.info("线程处理完成")
            else:
                logging.info("sleep 3 -->")
                time.sleep(3)
                continue


start_time = time.time()
# AccessApi().get_cookies()
AccessApi().main()
logging.info(f"耗时 {time.time()-start_time}")

# nohup python access_api.py  > access_api.log 2>&1 &
#         url = "https://www.tax.service.gov.uk/customs/payment-records"
# url = "https://www.gov.uk/guidance/get-your-postponed-import-vat-statement"
#             {'username': '51 75 03 31 03 12', 'password': '1%0!15y5^tUp9qeo'},
#             {'username': '99 16 23 79 60 67', 'password': 'GeodWJfk2026H2r8'},
# 97 39 74 74 18 25  芭球 BQ  CDS10542021
# 账管 李梦
# 电话 17796365443




