# -*- coding: utf-8 -*-
import time
import random
import scrapy
import sys, os
import asyncio
import logging
import platform
from time import time
from scrapy import signals
from scrapy.http import HtmlResponse as Response
from twisted.internet.threads import deferToThread
from scrapy.downloadermiddlewares.retry import RetryMiddleware, get_retry_request
sys.path.append(os.path.dirname(os.path.dirname(sys.path[0])))  # 上级目录
from amazon_spider.utils.random_ssl import DESAdapter
from amazon_spider.utils.common import is_internet_available


class AddAntiMiddleware:
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s

    def get_anti(self, url, port, ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/122.0.1927.82 Safari/537.36", whiteList="Cm1olmXuh1pnjADIkQlnAg=="):
        import execjs
        cwd = os.path.dirname(__file__)
        if "Windows" == platform.system():
            with open(cwd + '\\js\\anti_args.js', 'r') as f:
                ctx = execjs.compile(f.read())
                anti_content = ctx.call('anti', url, ua, port, whiteList)
                return anti_content
        else:
            with open(cwd + '/js/anti_args.js', 'r') as f:
                ctx = execjs.compile(f.read())
                anti_content = ctx.call('anti', url, ua, port, whiteList)
                return anti_content

    def get_bee(self):
        cwd = os.path.dirname(__file__)
        if "Windows" == platform.system():
            path = cwd + '\\js\\get_bee.js'
            cmd = f'node  {path}'
            logging.info(f"----{cmd}")
            pipeline = os.popen(cmd)
            # 读取结果
            result = pipeline.read()
            return [i.split(":")[1].replace("'", "").replace(",", "").strip() for i in result.split("\n") if ":" in i]
        else:
            path = cwd + '/js/get_bee.js'
            cmd = f'node  {path}'
            logging.info(f"js path is {cmd}")
            pipeline = os.popen(cmd)
            result = pipeline.read()
            return [i.split(":")[1].replace("'", "").replace(",", "").strip() for i in result.split("\n") if ":" in i]

    # def is_internet_available(self):
    #     import requests
    #     proxy = {
    #         # "http": "http://127.0.0.1:7890",
    #         # "https": "http://127.0.0.1:7890",
    #     }
    #     try:
    #         headers = {
    #             'accept': '*',
    #             'accept-language': 'zh-CN,zh;q=0.9',
    #             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    #         }
    #         requests.get("http://www.baidu.com", headers=headers, proxies=proxy, timeout=3, verify=False)
    #         return True
    #     except Exception as e:
    #         return False

    def get_r_bee(self, data, request):
        import requests as r
        for i in range(0, 10):
            try:
                session = r.Session()
                headers = {
                    'authority': 'www.temu.com',
                    'accept': 'application/json, text/plain, */*',
                    'accept-language': 'zh-CN,zh;q=0.9',
                    'content-type': 'application/json;charset=UTF-8',
                    'origin': 'https://www.temu.com',
                    'referer': 'https://www.temu.com/login.html?from=https^%^3A^%^2F^%^2Fwww.temu.com^%^2Fgoods.html^%^3Fgoods_id^%^3D601099517597329&login_scene=2&refer_page_name=goods&refer_page_id=10032_1696822261092_nbpqgqtfln&refer_page_sn=10032&_x_sessn_id=g6m9tej9xm',
                    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
                }
                # ua_msg = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(50, 102)}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36"
                # headers["user-agent"] = ua_msg
                proxies = {
                    'http': request.meta.get('proxy'),
                    'https': request.meta.get('proxy')
                }
                session.headers.update(headers)
                session.proxies = proxies
                session.mount('https://', DESAdapter())
                url = 'https://www.temu.com/api/phantom/xg/pfb/a4'

                if request.method == "GET":
                    if not is_internet_available():
                        logging.info("network break")
                        time.sleep(3)
                        continue
                    response = r.post(url, headers=headers, cookies=request.cookies, verify=False, data=data,
                                            proxies=proxies, timeout=20)

                    if not is_internet_available():
                        logging.info("network break")
                        time.sleep(1)
                        continue
                    bee = response.json()["result"]["a"]
                    logging.info(f"get request return response bee {bee}")
                    if request.meta.get("curlcffi"):
                        return {
                            '_bee': bee,
                            'api_uid': dict(response.cookies).get("dilx")
                        }
                    rs = session.get(request.url, headers=dict(request.headers.to_unicode_dict()), proxies=proxies, cookies={
                        "region": '211',
                        'currency': 'USD',
                        'language': 'en',
                        '_bee': bee,
                        'api_uid': dict(response.cookies).get("dilx")
                    }, timeout=10)
                    request.meta["bee"] = bee
                    request.meta["api_uid"] = dict(response.cookies).get("dilx")
                    # with open(f"{request.meta.get('goodid')}.html", "w", encoding="utf-8")as f:
                    #     f.write(rs.text)
                    response = Response(url=str(rs.url), status=rs.status_code, body=rs.content,
                                        encoding=request.encoding, request=request, headers=request.headers)
                    return response
                elif request.method == "POST" and not request.meta.get("bee"):
                    if not is_internet_available():
                        logging.info("network break")
                        time.sleep(3)
                        continue
                    response = session.post(url, headers=headers, cookies=request.cookies, verify=False, data=data, proxies=proxies, timeout=20)
                    logging.info("set post cookie bee")
                    return {
                        '_bee': dict(session.cookies).get("_bee"),
                        'api_uid': dict(session.cookies).get("dilx"),
                    }
            except:
                time.sleep(1)
                continue

    def process_request(self, request, spider):
        meta = request.meta
        if meta.get('anti'):
            request.headers['anti-content'] = self.get_anti(request.url, "", ua=str(request.headers['user-agent'], encoding='utf-8'), whiteList="CmyieWSMKx2DDwBxEOiOAg==")
            logging.info(f"add anti :{request.headers['anti-content']}")
        if meta.get("bee"):
            if not request.cookies.get("_bee"):
                ddd = self.get_bee()
                data = '{"data":"%s","timestamp":"%s","appKey":"fe","sign":"%s"}' % (ddd[0], ddd[1], ddd[3])
                r = self.get_r_bee(data, request)
                if request.method == "POST":
                    # request.cookies["_bee"] = r
                    request.cookies.update(r)
                    logging.info(request.cookies)
                    return
                if request.method == "GET" and request.meta.get("curlcffi"):
                    request.cookies.update(r)
                    logging.info(request.cookies)
                    return
                else:
                    return r
            else:
                request.meta["bee"] = request.cookies["_bee"]

            # request.cookies["_bee"] = bee_.get("_bee")
            # request.cookies["api_uid"] = bee_.get("api_uid")
            # request.cookies["njrpl"] = bee_.get("njrpl")
            # logging.info(f"add bee :{request.cookies['_bee']}")

    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.
        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response

    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.

        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass

    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


class ForeignProxyMiddleware:

    def process_request(self, request, spider):
        meta = request.meta
        if meta.get('amazon_proxy'):
            proxies = {
                "http": "http://customer-806df0-country-DE:4d57660b@proxy.ipipgo.com:31212",
                "https": "http://customer-806df0-country-DE:4d57660b@proxy.ipipgo.com:31212",
            }
            request.meta['proxy'] = proxies.get("http")
            logging.info(f'{request.meta["proxy"]} set proxy')
                # if request.get("https://lumtest.com/myip.json").json()["country"] =="US":
                #     request.meta['proxy'] = proxies.get("http")
                #     logging.info(f'{request.meta["proxy"]} set proxy')


class TemuIsPageError(RetryMiddleware):

    def process_response(self, request, response, spider):
        return deferToThread(self.down, request, response, spider)

    def down(self, request, response, spider):
        # if request.meta.get("page") == 1 and request.meta.get("method") == "GET":
        if request.meta.get("method") == "GET":
            log_str = response.xpath(
                '//div[@tabindex and @role="button" and contains(.//text(), "Sign") or contains(.//text(), "Hello")]//text()').getall()
            logging.info(f"page identification {log_str}")
            zip_code = response.xpath("//script[contains(text(), 'window.rawData')]//text()").get()
            title = response.xpath("//title//text()").get()
            if not zip_code or not log_str or title == "Temu":
                reason = "page error"
                return get_retry_request(
                    request,
                    reason=reason,
                    spider=spider
                )
            # elif "This item is sold out" in response.text:
            #     reason = "page item is sold out error"
            #     return get_retry_request(
            #         request,
            #         reason=reason,
            #         spider=spider
            #     )
            else:
                if '"currency":"USD"' in zip_code:
                    return response
                else:
                    # with open(f"{request.meta.get('goodid')}.html", "w", encoding="utf-8")as f:
                    #     f.write(response.text)
                    reason = "site error"
                    return get_retry_request(
                        request,
                        reason=reason,
                        spider=spider
                    )
        else:
            return response


def as_deferred(f):
    from twisted.internet.defer import Deferred
    return Deferred.fromFuture(asyncio.ensure_future(f))


class CurlCffiRequests:
    def __init__(self, delay=0):
        self.delay = delay

    def process_request(self, request, spider):
        start_time = time()
        d = as_deferred(self.down(request))
        d.addCallback(self._cb_latency, request, start_time)
        stats = spider.crawler.stats
        stats.inc_value("downloader/request_count")
        stats.inc_value(f"downloader/request_method_count/{request.method}")
        return d
        # return deferToThread(self.down, request)

    @staticmethod
    def _cb_latency(response: Response, request: scrapy.Request,
                    start_time: float) -> Response:
        request.meta["download_latency"] = time() - start_time
        return response

    @classmethod
    def from_crawler(cls, crawler):
        s = crawler.settings
        delay = s.get('DOWNLOAD_DELAY', 0)
        return cls(delay)

    async def down(self, request):
        if request.meta.get('curlcffi'):
            from curl_cffi.requests import AsyncSession
            from curl_cffi import curl, CurlHttpVersion
            from scrapy.http.headers import Headers
            from scrapy.http import HtmlResponse
            proxies = {
                'http': request.meta.get('proxy'),
                'https': request.meta.get('proxy')
            }
            logging.info("Start to set curlcffi")
            # logging.getLogger().info()
            headers = Headers(request.headers or {}, encoding='utf-8').to_unicode_dict()
            start_time = time()
            async with AsyncSession() as s:
                impersonate = request.meta.get("impersonate") or random.choice([
                    "chrome99", "chrome101", "chrome110", "edge99", "edge101",
                    "chrome107"
                ])

                timeout = request.meta.get("download_timeout") or 10
                try:
                    response = await s.request(
                        request.method,
                        request.url,
                        data=request.body,
                        headers=headers,
                        proxies=proxies,
                        verify=False,
                        cookies=request.cookies,
                        timeout=timeout,
                        http_version=CurlHttpVersion.V2TLS,
                        impersonate=impersonate
                    )
                except curl.CurlError as e:
                    raise TimeoutError(
                        f"curlcffi Getting {request.meta.get('asin')} took longer than {timeout} seconds."
                    ) from e
                end_time = time()  # 记录结束时间
                response_time = end_time - start_time  # 计算响应时间
                logging.info(f"curlcffi Response time: {response_time} seconds")
                response = HtmlResponse(
                    request.url,
                    encoding=response.encoding,
                    status=response.status_code,
                    # headers=response.headers,
                    body=response.content,
                    request=request
                )
                return response

    def process_exception(self, request, exception, spider):
        logging.info(f"middleware error {spider.r_utils(request)} {exception}")
        return None


class RandomUserAgentMiddleware(object):
    def process_request(self, request, spider):
        ua_msg = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(50, 102)}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36"
        request.headers['User-Agent'] = ua_msg