import hashlib
import os
import time
import uuid
import random
import re
import json
import io
from io import BytesIO
from typing import Optional, Dict, Any, List
# 引入线程池
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import requests
from PIL import Image,UnidentifiedImageError
import pillow_avif
from scrapy import Selector
from requests.exceptions import RequestException, JSONDecodeError, Timeout, ConnectionError, HTTPError
from loguru import logger
from  cookie_manager import cookie_manager
# 导入配置

from amazon_configs import (
    site_name_secret_dict,
    us_devices_list,
    uk_devices_list,
    de_devices_list,
     us_cookie_dict, uk_cookie_dict, de_cookie_dict
)


# 站点配置映射
SITE_CONFIG_MAPPER = {
    "us": {
        "devices": us_devices_list,
        "cookies": us_cookie_dict,
        "base_url": "https://www.amazon.com"
    },
    "uk": {
        "devices": uk_devices_list,
        "cookies": uk_cookie_dict,
        "base_url": "https://www.amazon.co.uk"
    },
    "de": {
        "devices": de_devices_list,
        "cookies": de_cookie_dict,
        "base_url": "https://www.amazon.de"
    }
}

# 基础URL配置
AMAZON_SEARCH_BASE_URL = "{base_url}/s?rh=p_78:{bbx_asin_list}&rank=asin-scores-asc-rank&searchMethod=CameraSearch"
WEB_REFERER_URL = "{base_url}/stylesnap?pd_rd_w=wvP9K&content-id=amzn1.sym.d26e24db-d6a0-41ff-bb8a-bf1969aea086%3Aamzn1.sym.d26e24db-d6a0-41ff-bb8a-bf1969aea086&pf_rd_p=d26e24db-d6a0-41ff-bb8a-bf1969aea086&pf_rd_r=3EMK072JNCNA4Z81M3GS&pd_rd_wg=So1ZB&pd_rd_r=99eed0de-82e8-4168-a76d-55fe6451ac50&qid=1732271359&ref_=sxts_snpl_1_0_d26e24db-d6a0-41ff-bb8a-bf1969aea086&dplnk=Y&dplnkCustom=Y&q=local"

# === 重试配置 ===
GLOBAL_RETRY_TIMES = 5  # 全局重试
STEP_RETRY_TIMES = 5  # 单步重试
RETRY_DELAY = 1


# === 修改：接收 bytes 数据 ===
def get_image_size(image_data: bytes) -> Optional[Dict[str, int]]:
    try:
        with Image.open(io.BytesIO(image_data)) as img:
            width, height = img.size
            return {"width": width, "height": height}
    except Exception as e:
        logger.error(f"获取图片尺寸失败: {e}")
        return None


def get_page_num(total: int | None, count_per_page: int | None) -> int:
    if not isinstance(total, int) or not isinstance(count_per_page, int):
        return 0
    if total <= 0 or count_per_page <= 0:
        return 0
    page_count = total // count_per_page
    return page_count + 1 if total % count_per_page > 0 else page_count


class AmazonImageSearch:
    def __init__(self, site_name: str):
        if site_name not in site_name_secret_dict:
            raise ValueError(f"不支持的站点: {site_name}")

        self.site_name = site_name
        self.site_config = site_name_secret_dict[site_name]
        self.site_specific = SITE_CONFIG_MAPPER[site_name]
        self.base_url = self.site_specific["base_url"]
        self.snap_url = f"{self.site_config['snap_url']}/style-snap/2.0"

        # 初始化上下文
        self.cookies = {}
        self.session_id = ""
        self.device_info = {}
        self.client_device_id = ""
        self.headers = {}

        self._refresh_client_context()

    def _refresh_client_context(self) -> None:
        """刷新客户端上下文（优先从数据库池获取，失败则用本地配置兜底）"""
        # 1. 尝试从管理器获取 (这里会自动处理 3天刷新 和 阈值刷新)
        pool_cookie = cookie_manager.get_cookie(self.site_name)
        if pool_cookie:
            self.cookies = pool_cookie
            logger.info(f"[{self.site_name}] 成功获取Cookie池cookie  (SID: {self.cookies.get('session-id', '')[:30]}...)")
        else:
            # 2. 兜底：如果数据库全挂了，使用本地 configs 里的静态配置
            self.cookies = self.site_specific["cookies"].copy()
            logger.warning(f"[{self.site_name}] Cookie池耗尽或数据库异常，使用本地默认配置")

        self.session_id = self.cookies.get("session-id", "")
        devices = self.site_specific["devices"]
        self.device_info = random.choice(devices) if devices else {}
        self.client_device_id = str(uuid.uuid4())

        self.headers = {
            "x-amz-access-token": "",
            "x-amz-lens-session-auth-token": self.cookies.get("session-token", ""),
            "x-amz-lens-session-id": self.session_id,
            "x-amz-lens-ubid": self.cookies.get("ubid-main", ""),
            "accept-encoding": "gzip",
            "user-agent": "okhttp/4.9.1",
        }

    def _generate_auth_params(self) -> Dict[str, str]:
        ts = str(int(time.time()))
        combined = (
            f"{self.site_config['secret']}{self.site_config['username']}"
            f"{self.site_config['application']}{ts}"
        )
        authtoken = hashlib.sha512(combined.encode("utf-8")).hexdigest()
        return {"ts": ts, "authtoken": authtoken}

    def _build_query_metadata(self, extra_params: Optional[Dict[str, str]] = None) -> str:
        base_params = {
            "amznSessionId": self.session_id,
            "clientVersion": "30.20.2.100",
            "cardsVersion": "1.0",
            "clientMessageVersion": "1.0",
            "amznDirectedCustomerId": "",
            "clientDeviceId": self.client_device_id,
            "clientId": str(uuid.uuid4()),
            "sourceType": "Photo",
            "ingressSource": "ctp",
            "uiMode": "stylesnap",
            **self.device_info
        }
        if extra_params:
            base_params.update(extra_params)
        return json.dumps(base_params)

    def _get_new_proxy(self) -> Dict[str, str]:
        try:
            proxy_url = 'http://api.xiequ.cn/VAD/GetIp.aspx?act=getturn51&uid=83353&vkey=6FEB79CD7E8700AFCDC44CDBC3889B9D&num=1&time=6&plat=1&re=0&type=7&so=3&group=51&ow=1&spl=1&addr=&db=1'
            proxy_res = requests.get(url=proxy_url, timeout=5).text.strip()
            return {
                "http": f"socks5://{proxy_res}",
                "https": f"socks5://{proxy_res}"
            }
        except Exception as e:
            logger.warning(f"获取代理失败: {e}")
            return {}

    def _retry_request(self, method: str, url: str,
                       **kwargs) -> requests.Response:
        if "headers" in kwargs:
            if "user-agent" not in {k.lower() for k in kwargs["headers"]}:
                kwargs["headers"][
                    "user-agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"

        for attempt in range(STEP_RETRY_TIMES):
            try:
                # if fixed_proxies:
                #     current_proxies = fixed_proxies
                # else:
                #     current_proxies = self._get_new_proxy()
                if method.upper() == "POST":
                    # response = requests.post(url, timeout=20, **kwargs, proxies=current_proxies, verify=False)
                    response = requests.post(url, timeout=20, **kwargs)
                    # logger.info(f'post {url}响应， {response.text}')
                else:
                    # response = requests.get(url, timeout=20, **kwargs, proxies=current_proxies, verify=False)
                    response = requests.get(url, timeout=20, **kwargs)
                    # logger.info(f'get {url}响应， {response.text}')
                response.raise_for_status()
                return response

            except RequestException as e:
                logger.warning(f"请求异常 ({method} {url}): {e} - 第 {attempt + 1}/{STEP_RETRY_TIMES} 次重试")
                if attempt < STEP_RETRY_TIMES - 1:
                    time.sleep(RETRY_DELAY)
            except Exception as e:
                logger.error(f"发生未预期错误: {e}")
                raise e

        raise RequestException(f"请求在 {STEP_RETRY_TIMES} 次尝试后最终失败: {url}")

    def _parse_app_asin_list(self, response_json: Dict[str, Any]) -> List[str]:
        # 1. 检查 JSON 结构是否完整
        if "style-snap" not in response_json or "searchResult" not in response_json["style-snap"]:
            # 结构不对，直接报错
            raise ValueError("App端响应结构异常: 缺少 style-snap 或 searchResult")

        results = response_json["style-snap"]["searchResult"]

        # 2. 尝试提取 ASIN
        asins = []
        if results:
            asins = results[0].get("bbxAsinList", [])

        # === 核心修改点 ===
        # 如果提取到的列表为空，抛出异常，强制触发重试 (逻辑同Web端)
        if not asins:
            raise ValueError("App端解析结果为空 (bbxAsinList)，强制触发重试")

        return asins

    # === 修改：接收 image_data: bytes ===
    def _run_app_search(self, image_data: bytes, mode: str) -> List[str]:
        # 这里的 open(image_path) 删除了，直接使用 image_data

        auth = self._generate_auth_params()
        metadata = self._build_query_metadata({"orientation": "-1"})

        files = {
            "application": (None, self.site_config['application']),
            "query_metadata": (None, metadata),
            "authtoken": (None, auth['authtoken']),
            "lang": (None, "en_US"),
            "username": (None, self.site_config['username']),
            "ts": (None, auth['ts']),
            "file": ("image.jpg", image_data, "image/jpeg"),
        }

        resp = self._retry_request("POST", self.snap_url, files=files, headers=self.headers, cookies=self.cookies)
        resp_json = resp.json()
        if mode == "full_image":
            query_id = resp_json.get("queryId")

            if not query_id:
                raise ValueError("全图模式失败：未获取到 queryId (可能需要更换Cookie或环境)")

            image_size = get_image_size(image_data)  # 修改为传bytes
            if not image_size: raise ValueError("无法读取图片尺寸")

            offset = random.randint(0, 2)
            bbox = {
                "tlx": max(0, offset),
                "tly": max(0, offset),
                "brx": max(image_size["width"] - offset, max(0, offset) + 1),
                "bry": max(image_size["height"] - offset, max(0, offset) + 1),
                "imh": image_size["height"],
                "imw": image_size["width"]
            }
            auth = self._generate_auth_params()
            metadata = self._build_query_metadata()
            form_data = {
                "mainQueryId": (None, query_id),
                "uiMode": (None, "stl_bbx_reformulation"),
                "application": (None, self.site_config['application']),
                "query_metadata": (None, metadata),
                "authtoken": (None, auth['authtoken']),
                "inputBoundingBox": (None, json.dumps(bbox)),
                "imageHash": (None, ""),
                "lang": (None, "en_US"),
                "username": (None, self.site_config['username']),
                "ts": (None, auth['ts']),
            }

            resp = self._retry_request("POST", self.snap_url, files=form_data, headers=self.headers,
                                       cookies=self.cookies)
            resp_json = resp.json()

        return self._parse_app_asin_list(resp_json)

    # ========================================================================
    #  Web 端
    # ========================================================================

    def _web_get_token(self, strict_cookies: Dict[str, str]) -> str:
        url = WEB_REFERER_URL.format(base_url=self.base_url)
        headers = {
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "accept-language": "zh-CN,zh;q=0.9",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"
        }
        resp = self._retry_request("GET", url, headers=headers, cookies=strict_cookies)

        sel = Selector(text=resp.text)
        token = sel.xpath(".//input[@name='stylesnap']/@value").get()
        if not token:
            raise ValueError("Web页面未找到 stylesnap token")
        return token

    # === 修改：接收 image_data: bytes ===
    def _web_upload_img(self, image_data: bytes, token: str, strict_cookies: Dict[str, str]) -> \
    List[str]:
        url = f"{self.base_url}/stylesnap/upload"
        headers = {
            "origin": self.base_url,
            "referer": WEB_REFERER_URL.format(base_url=self.base_url),
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"
        }

        # 直接使用 bytes 数据
        files = {"explore-looks.jpg": ("explore-looks.jpg", image_data, "image/jpeg")}
        params = {'stylesnapToken': token}

        resp = self._retry_request("POST", url, headers=headers, cookies=strict_cookies,
                                   files=files, params=params)

        data = resp.json()
        # logger.info(f'web asin响应为 {data}')
        if "searchResults" not in data:
            raise ValueError("Web响应缺少 searchResults")

        asins = []
        if data["searchResults"]:
            meta_list = data["searchResults"][0].get("bbxAsinMetadataList", [])
            asins = [item.get("asin") for item in meta_list if item.get("asin")]
        if not asins:
            raise ValueError("Web端解析结果为空 (可能被风控/IP问题)，强制触发重试")
        return asins

    # === 修改：接收 image_data: bytes ===
    def _run_web_search(self, image_data: bytes) -> List[str]:
        current_session_cookies = self.cookies.copy()
        # web_session_proxies = self._get_new_proxy()
        token = self._web_get_token(current_session_cookies)
        asins = self._web_upload_img(image_data, token, current_session_cookies)

        logger.info(f"Web端识别成功，ASIN数: {len(asins)}")
        return asins

    # ========================================================================
    #  解析结果页
    # ========================================================================

    def _parse_items(self, html: str) -> List[Dict[str, Any]]:
        items = []
        res = Selector(text=html)
        product_divs = res.xpath('//div[@data-asin and string-length(@data-asin) > 0]')
        for div in product_divs:
            # 在当前 div 内部获取 ASIN
            asin = div.xpath('./@data-asin').get()
            # 如果有些 div 是空的占位符，跳过
            if not asin:
                continue
            # 1. 价格
            asin_price = div.xpath(
                './/div[contains(@data-cy, "price-recipe")]//span[contains(@class, "a-offscreen")]/text()').get("")
            # 2. 评分
            # 增加备选：aria-label 往往比 class 更稳定
            asin_rating = div.xpath(
                './/div[@data-cy="reviews-block"]//span[contains(@class, "a-icon-alt")]/text()').get("")
            # 3. 图片
            # 优先找 s-image (搜索页标准图)，这比 generic img 更准
            asin_img_list = div.xpath(
                './/div[@data-cy="image-container"]//img[contains(@class, "s-image")]/@src').getall()
            if not asin_img_list:
                asin_img_list = div.xpath('.//img/@src').getall()
            # 过滤 gif/svg
            valid_imgs = [i for i in asin_img_list if i and i.split(".")[-1] not in ["gif", "svg"]]
            asin_img = valid_imgs[0] if valid_imgs else None
            # 4. 标题与品牌
            # 获取 h2 下的所有文本片段
            h2_texts = div.xpath('.//div[@data-cy="title-recipe"]//h2//text()').getall()
            h2_texts = [t.strip() for t in h2_texts if t.strip()]
            if len(h2_texts) >= 2:
                asin_brand = h2_texts[0]
                asin_title = h2_texts[1]
            elif len(h2_texts) == 1:
                asin_brand = None
                asin_title = h2_texts[0]
            # 5. 总评论数
            asin_total_comments = div.xpath(
                './/div[@data-cy="reviews-block"]//a[contains(@aria-label, "ratings")]//text() ').get("")
            # 6. 月销/购买量
            # 匹配逻辑：span标签包含“a-color-secondary”类 + 文本包含“bought”或“purchased”（亚马逊销量常用关键词）
            # asin_bought = div.xpath('.//span[contains(@class, "a-color-secondary") and (contains(text(), "bought") or contains(text(), "purchased"))]/text()').get("")
            asin_bought = div.xpath(
                './/div[@data-cy="reviews-block"]//span[contains(@class, "a-color-secondary")]//text()').get("")
            # --- 数据清洗 ---
            re_float = re.compile(r'[\d,]+\.?\d*')
            # 价格清洗
            if asin_price:
                match = re.search(re_float, asin_price)
                asin_price = match.group() if match else None

            # 评分清洗
            if asin_rating:
                match = re.search(re_float, asin_rating)
                asin_rating = match.group() if match else None

            # 评论数清洗
            if asin_total_comments:
                # 移除括号、逗号、ratings文字
                # asin_total_comments = re.sub(r'[(),a-zA-Z\s]', '', asin_total_comments)
                asin_total_comments = asin_total_comments.replace("(", "").replace(")", "")

            item = {
                "asin": asin,  # asin
                "price": asin_price,  # 价格
                "rating": asin_rating,  # 评分数
                "img_url": asin_img,  # 图片url
                "title": asin_title,  # 标题
                "brand": asin_brand,  # 品牌名
                "bought": asin_bought.strip() if asin_bought else None,  # 月销
                "total_comments": asin_total_comments,  # 评论数
            }
            # print(item)
            items.append(item)
        # logger.info(f'解析成功 当前页{len(items)}条数据')
        return items

    def _fetch_single_page(self, url: str, page: int, headers: Dict[str, str]) -> List[Dict[str, Any]]:
        try:
            resp_p = self._retry_request("GET", f"{url}&page={page}", headers=headers, cookies=self.cookies)
            return self._parse_items(resp_p.text)
        except Exception as e:
            logger.warning(f"第{page}页失败: {e}")
            return []

    def _fetch_results(self, url: str) -> Dict[str, Any]:
        headers = {
            "accept": "text/html,application/xhtml+xml,*/*",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
            "viewport-width": "1343"
        }

        all_items = []

        # logger.info("正在获取第1页...")
        resp = self._retry_request("GET", url, headers=headers, cookies=self.cookies)
        page1_items = self._parse_items(resp.text)  # 第一页数据解析
        all_items.extend(page1_items) # 第一页数据添加到列表
        total_res = re.search(r'"totalResultCount":(\d+)', resp.text)
        count_res = re.search(r'"asinOnPageCount":(\d+)', resp.text)
        total = int(total_res.group(1)) if total_res else None
        per_page = int(count_res.group(1)) if count_res else None
        pages = get_page_num(total, per_page)  # 计算页数

        logger.info(f"找到商品 {total} 个，共 {pages} 页")

        if pages > 1:
            logger.info(f"开始并发爬取第 2 到 {pages} 页...")
            with ThreadPoolExecutor(max_workers=4) as executor:
                futures = [
                    executor.submit(self._fetch_single_page, url, p, headers)
                    for p in range(2, pages + 1)
                ]
                for future in as_completed(futures):
                    items = future.result()
                    if items:
                        all_items.extend(items)

        df = pd.DataFrame(all_items)
        #  按 asin 列去重 再转回字典
        final_items = df.drop_duplicates(subset=['asin']).to_dict('records') if not df.empty else []
        return {"total_items": len(final_items), "items": final_items}

    # === 图片下载方法 ===
    def _download_image(self, img_url: str) -> bytes:
        """下载图片并返回二进制数据 """
        # 可以更新一下 图片下载走_retry_request
        logger.info(f"正在下载图片: {img_url}")
        target_size_bytes = 1 * 1024 * 1024  # 限定最大为1mb
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'no-cache',
            'ect': '3g',
            'pragma': 'no-cache',
            'priority': 'u=0, i',
            'sec-ch-dpr': '1',
            'sec-ch-ua': '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
            'sec-ch-ua-form-factors': '"Desktop"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-ch-viewport-height': '376',
            'sec-ch-viewport-width': '1920',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'none',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
        }
        for i in range(3):
            try:
                resp = requests.get(img_url, headers=headers, timeout=15)
                resp.raise_for_status()
                img_bytes = resp.content
                # 2. 判断大小，如果小于 1MB 直接返回，不浪费 CPU 如果是 SVG，Pillow 处理不了，且 SVG 通常很小，直接下载返回即可
                if len(img_bytes) <= target_size_bytes or img_url.lower().endswith('.svg'):
                    return img_bytes
                f_obj = BytesIO(img_bytes)
                # 3. 内存中进行压缩
                # 使用 BytesIO 将字节流转换为 Pillow 可识别的对象
                try:
                    with Image.open(f_obj) as img:
                        logger.info(f"图片大小 ({len(img_bytes) / 1024 / 1024:.2f}MB) 超过限制，开始压缩...")
                        # 兼容性处理：如果是 PNG/RGBA，转为 RGB (否则无法保存为 JPEG)
                        if img.mode != "RGB":
                            img = img.convert("RGB")
                        # 压缩参数初始化
                        buffer = BytesIO()
                        quality = 90  # 初始质量
                        min_quality = 20  # 最低质量
                        step = 10  # 每次递减幅度
                        while True:
                            # 清空缓冲区指针，准备写入
                            buffer.seek(0)
                            buffer.truncate()
                            # 尝试保存压缩版   强制转为 JPEG，这是兼容性最好的压缩方式
                            img.save(buffer, format="JPEG", quality=quality, optimize=True)
                            current_size = buffer.tell()
                            # 检查是否达标
                            if current_size <= target_size_bytes:
                                logger.info(f"压缩成功: {current_size / 1024 / 1024:.2f}MB (Quality={quality})")
                                return buffer.getvalue()
                            # 如果不达标，继续调整
                            if quality > min_quality:
                                quality -= step
                            else:
                                # 质量已降无可降，只能缩小分辨率 (Resize)
                                w, h = img.size
                                # 如果图片已经小到 100px 了，直接返回吧（防止死循环风险）
                                if w < 100 or h < 100:
                                    logger.warning(f"图片已缩至 {w}x{h} 仍无法满足大小限制，强制返回")
                                    return buffer.getvalue()
                                # 每次缩小为原来的 0.9 倍
                                img = img.resize((int(w * 0.9), int(h * 0.9)), Image.Resampling.LANCZOS)
                                logger.info(f"质量极限，缩小分辨率至: {img.size}")
                                # 缩小尺寸后，回升一点质量再试，防止画质太烂
                                quality = 60
                except UnidentifiedImageError:
                    # Pillow 认不出来的格式（比如 SVG 或 奇怪的加密格式）
                    logger.warning(f"Pillow 无法识别图片格式: {img_url}，直接返回原图")
                    return img_bytes
                except OSError:
                    # 图片文件损坏
                    logger.warning(f"图片文件似乎已损坏: {img_url}")
                    return img_bytes
            except Exception as e:
                logger.warning(f"图片下载失败/压缩失败(第{i + 1}次): {e}")
                time.sleep(1)
        raise ValueError(f"图片下载最终失败: {img_url}")

    # === 修改：入口参数改为 image_url ===
    def search(self, image_url: str, search_mode: str = "default") -> Dict[str, Any]:
        start_time = time.time()
        result = {
            "is_web": 0, "is_app": 0,
            "site_name": self.site_name, "mode": search_mode,
            "input_image_url": image_url,
            "success": 0, "save_time": time.strftime("%Y-%m-%d %H:%M:%S"),
            "items": [], "total_items": 0
        }

        # 1. 预先下载图片
        try:
            image_data = self._download_image(image_url)
        except Exception as e:
            logger.error(f"图片资源获取失败: {e}")
            result["error"] = str(e)
            return result

        # 2. 进入重试循环
        for attempt in range(GLOBAL_RETRY_TIMES):
            try:
                app_asins = []
                web_asins = []

                with ThreadPoolExecutor(max_workers=2) as executor:
                    # 传入下载好的 bytes 数据
                    future_app = executor.submit(self._run_app_search, image_data, search_mode)
                    future_web = executor.submit(self._run_web_search, image_data)

                    try:
                        app_asins = future_app.result()
                        app_asins = list(set(app_asins))
                        result["is_app"] = 1 if app_asins else 0
                        logger.info(f"App ({search_mode}) 获取 ASIN: {len(app_asins)}")
                    except Exception as e:
                        logger.warning(f"App端识别非关键错误: {e}")
                        raise e

                    try:
                        web_asins = future_web.result()
                        web_asins = list(set(web_asins))
                        result["is_web"] = 1 if web_asins else 0
                    except Exception as e:
                        logger.warning(f"{image_url} Web端识别错误: {e}")
                        raise e

                combined = list(set(app_asins + web_asins))
                if not combined:
                    raise ValueError("App和Web端均未获取到有效ASIN")
                logger.info(
                    f"双端识别完成 - App ASIN数: {len(app_asins)}, Web ASIN数: {len(web_asins)}, 合并去重后: {len(combined)}")

                url = AMAZON_SEARCH_BASE_URL.format(base_url=self.base_url, bbx_asin_list="|".join(combined))
                # logger.info(f'合并url为：{url}')
                data = self._fetch_results(url)
                # ======================================================
                processed_items = data["items"]
                for item in processed_items:
                    asin = item.get("asin")
                    in_app = asin in app_asins
                    in_web = asin in web_asins

                    if in_app and in_web:
                        item["source"] = "app_web"
                    elif in_app:
                        item["source"] = "app"
                    elif in_web:
                        item["source"] = "web"
                    else:
                        # 如果爬取到的 ASIN 不在种子列表中（例如广告或推荐位）
                        # 标记为 other
                        item["source"] = "other"
                        # ======================================================
                result.update({
                    "app_asin": "|".join(app_asins),
                    "web_asin": "|".join(web_asins),
                    "combined_asin": "|".join(combined),
                    "search_url": url,
                    "duration": round(time.time() - start_time, 2),
                    "success": 1,
                    "device_info": self.device_info,
                    "total_items": data["total_items"], "items": data["items"]
                })
                # logger.info(f'返回值类型{type(result)}')
                return result

            except Exception as e:
                logger.error(f"尝试 {attempt + 1} 失败: {e}")
                # === 新增：将当前错误的 Cookie 从池子中踢出 ===
                if self.cookies:
                    cookie_manager.mark_invalid(self.site_name, self.cookies)
                if attempt < GLOBAL_RETRY_TIMES - 1:
                    logger.info("刷新环境(Cookie/设备)中...")
                    time.sleep(RETRY_DELAY)
                    self._refresh_client_context()
                else:
                    result.update({
                        "error": str(e),
                        "duration": round(time.time() - start_time, 2),
                        "success": 0,
                        "device_info": self.device_info
                    })

        return result


if __name__ == "__main__":
    # 测试用图片链接
    # test_img_url = "https://soundasia.oss-cn-shenzhen.aliyuncs.com/yswg-img/SoundasiaAmazon/file/2025/1125/bdb9b06102184048b6eb9db3b39bb97e.png"
    test_img_url = "https://yswg-private-test.oss-cn-shenzhen.aliyuncs.com/SoundasiaAmazon/competitor_image/2025/1127/88e90bbd317a42ea80cc9128ea333e6c.svg" # 图片权限问题
    # test_img_url = "https://m.media-amazon.com/images/I/71IFE6W6THL._AC_UL320_.jpg"
    # test_img_url = "https://m.media-amazon.com/images/I/71IFE6W6THL._AC_SY550_.jpg"
    # test_img_url = "https://m.media-amazon.com/images/I/71G1BAeYlNL._AC_SX300_SY300_QL70_FMwebp_.jpg"

    try:
        client = AmazonImageSearch(site_name="uk")
        logger.info("\n=== 测试默认模式 ===")
        result = client.search(test_img_url, search_mode="default")
        # logger.success(f"Result: Success={default_result}")
        data = result
        print(f"识别成功: {data.get('success')}")
        print(f"找到商品数: {data.get('total_items')}")
        print(f"耗时: {data.get('duration')}")
        print(f"搜索url: {data.get('search_url')}")
        # 打印前3个商品看看
        items = data.get('items', [])
        if items:
            print("\n--- 商品示例 ---")
            for item in items:
                print(f"ASIN: {item['asin']} | 价格: {item['price']} | 评分：{item['rating']}  | img_url：{item['img_url']}  | 品牌名：{item['brand']}  | 销量：{item['bought']}  | 评论数：{item['total_comments']}  | asin来源：{item['source']}  | 标题: {item['title'][:50]}...")
    except Exception as e:
        logger.error(f"测试失败: {e}")