亚马逊新版app端 以图识物 支持默认识别和全图识别 增加了随机设备获取 和适配 uk us de站点 uk de未经过抓包测试

parent 9dcae35d
import hashlib
import time
import uuid
import random
from typing import Optional, Dict, Any
from loguru import logger
import requests
import json
from PIL import Image
from requests.exceptions import RequestException, JSONDecodeError
# 导入所有站点相关配置
from amazon_configs import (
site_name_secret_dict,
us_devices_list,
uk_devices_list,
de_devices_list,
us_cookie_dict,
uk_cookie_dict,
de_cookie_dict
)
# 配置日志
# logger.add("amazon_search_optimized.log", rotation="10 MB", level="INFO")
# 站点配置映射(关联设备列表和Cookie)
SITE_CONFIG_MAPPER = {
"us": {
"devices": us_devices_list,
"cookies": us_cookie_dict
},
"uk": {
"devices": uk_devices_list,
"cookies": uk_cookie_dict
},
"de": {
"devices": de_devices_list,
"cookies": de_cookie_dict
}
}
# asin 网页端打开 url
AMAZON_SEARCH_BASE_URL = "https://www.amazon.com/s?rh=p_78:{bbx_asin_list}&rank=asin-scores-asc-rank&searchMethod=CameraSearch"
# 重试策略配置
RETRY_TIMES = 5
RETRY_DELAY = 1
def get_image_size(image_path: str) -> Optional[Dict[str, int]]:
"""获取图片宽高尺寸"""
try:
with Image.open(image_path) as img:
width, height = img.size
return {"width": width, "height": height}
except FileNotFoundError:
logger.error(f"图片文件未找到: {image_path}")
return None
except Exception as e:
logger.error(f"获取图片尺寸失败 ({image_path}): {e}")
return None
class AmazonImageSearch:
def __init__(self, site_name: str):
# 验证站点合法性
if site_name not in site_name_secret_dict:
raise ValueError(f"不支持的站点: {site_name},支持站点:{list(site_name_secret_dict.keys())}")
if site_name not in SITE_CONFIG_MAPPER:
raise ValueError(f"站点 {site_name} 缺少设备或Cookie配置")
self.site_name = site_name
# 加载站点基础配置(secret等)
self.site_config = site_name_secret_dict[site_name]
# 加载站点设备列表和Cookie
self.site_specific = SITE_CONFIG_MAPPER[site_name]
self.cookies = self.site_specific["cookies"]
self.session_id = self.cookies.get("session-id",'')
# 从站点设备列表随机选择一个设备(用于query_metadata)
self.device_info = self._get_random_device()
# 生成客户端设备ID
self.client_device_id = str(uuid.uuid4())
logger.info(
f"客户端初始化完成 - 站点: {self.site_name}, "
f"随机设备: {self.device_info.get('clientDevice')}, "
f"clientDeviceId: {self.client_device_id}"
)
# 构建请求头
self.headers = {
"x-amz-access-token": "",
"x-amz-lens-session-auth-token": self.cookies.get("session-token", ""),
"x-amz-lens-session-id": self.session_id,
"x-amz-lens-ubid": self.cookies.get("ubid-main", ""),
"accept-encoding": "gzip",
"user-agent": "okhttp/4.9.1",
}
self.snap_url = f"{self.site_config['snap_url']}/style-snap/2.0"
def _get_random_device(self) -> Dict[str, str]:
"""从站点设备列表中随机选择一个设备"""
devices = self.site_specific["devices"]
if not devices:
raise ValueError(f"站点 {self.site_name} 的设备列表为空")
return random.choice(devices)
def _generate_auth_params(self) -> Dict[str, str]:
"""生成认证所需的 authtoken 和 ts"""
ts = str(int(time.time()))
combined = (
f"{self.site_config['secret']}{self.site_config['username']}"
f"{self.site_config['application']}{ts}"
)
authtoken = hashlib.sha512(combined.encode("utf-8")).hexdigest()
return {"ts": ts, "authtoken": authtoken}
def _build_query_metadata(self, extra_params: Optional[Dict[str, str]] = None) -> str:
"""构建通用的 query_metadata,包含随机设备信息"""
base_params = {
"amznSessionId": self.session_id,
"clientVersion": "30.20.2.100",
"cardsVersion": "1.0",
"clientMessageVersion": "1.0",
"amznDirectedCustomerId": "",
"clientDeviceId": self.client_device_id,
"clientId": str(uuid.uuid4()),
"sourceType": "Photo",
"ingressSource": "ctp",
"uiMode": "stylesnap",
# 注入随机设备信息
**self.device_info
}
if extra_params:
base_params.update(extra_params)
return json.dumps(base_params)
def _parse_response(self, response_json: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""从API响应中解析ASIN列表并构建结果"""
try:
search_result = response_json["style-snap"]["searchResult"][0]
bbx_asin_list = search_result.get("bbxAsinList", [])
if not bbx_asin_list:
logger.warning("响应中 bbxAsinList 为空")
return None
unique_asin_list = list(set(bbx_asin_list)) # 去重asin
joined_asins = "|".join(unique_asin_list)
return {
"is_app": 1,
"asin_list_app": joined_asins,
"search_url": AMAZON_SEARCH_BASE_URL.format(bbx_asin_list=joined_asins),
}
except (KeyError, IndexError, TypeError) as e:
logger.error(f"解析响应失败: {e}. 响应内容: {response_json}")
return None
def _perform_request(self, **kwargs) -> Dict[str, Any]:
"""统一的请求发送方法,包含重试逻辑"""
for attempt in range(RETRY_TIMES):
try:
response = requests.post(**kwargs, timeout=10)
response.raise_for_status() # 对非2xx响应抛出HTTPError
return response.json()
except JSONDecodeError:
logger.error(f"请求失败 (第 {attempt + 1}/{RETRY_TIMES} 次): 响应不是有效的JSON格式。")
except RequestException as e:
logger.error(f"请求失败 (第 {attempt + 1}/{RETRY_TIMES} 次): {e}")
if attempt < RETRY_TIMES - 1:
logger.warning(f"将在 {RETRY_DELAY} 秒后重试...")
time.sleep(RETRY_DELAY)
raise RequestException(f"API请求在 {RETRY_TIMES} 次尝试后最终失败。")
def _default_search(self, image_path: str) -> Dict[str, Any]:
"""执行默认的图片识别请求(第一步)"""
logger.info(f"开始默认识别 (站点: {self.site_name}, 图片: {image_path}),站点链接:{self.snap_url}")
try:
with open(image_path, "rb") as f:
image_data = f.read()
except FileNotFoundError:
logger.error(f"无法读取图片文件: {image_path}")
raise
auth_params = self._generate_auth_params()
query_metadata = self._build_query_metadata({"orientation": "-1"}) # 默认图片搜索 额外传 orientation
files = {
"application": (None, self.site_config['application']),
"query_metadata": (None, query_metadata),
"authtoken": (None, auth_params['authtoken']),
"lang": (None, "en_US"),
"username": (None, self.site_config['username']),
"ts": (None, auth_params['ts']),
"file": ("image.jpg", image_data, "image/jpeg"),
}
return self._perform_request(url=self.snap_url, files=files, headers=self.headers)
def _full_image_search(self, query_id: str, image_path: str) -> Dict[str, Any]:
"""执行全图识别请求(第二步)"""
logger.info(f"开始全图识别 (Query ID: {query_id[:10]}...)")
image_size = get_image_size(image_path)
if not image_size:
raise ValueError("无法获取图片尺寸,无法进行全图搜索。")
# 生成随机裁剪框
offset = random.randint(0, 2)
bounding_box = {
"tlx": max(0, offset),
"tly": max(0, offset),
"brx": max(image_size["width"] - offset, max(0, offset) + 1),
"bry": max(image_size["height"] - offset, max(0, offset) + 1),
"imh": image_size["height"],
"imw": image_size["width"]
}
auth_params = self._generate_auth_params()
query_metadata = self._build_query_metadata()
form_data = {
"mainQueryId": (None, query_id),
"uiMode": (None, "stl_bbx_reformulation"),
"application": (None, self.site_config['application']),
"query_metadata": (None, query_metadata),
"authtoken": (None, auth_params['authtoken']),
"inputBoundingBox": (None, json.dumps(bounding_box)),
"imageHash": (None, ""),
"lang": (None, "en_US"),
"username": (None, self.site_config['username']),
"ts": (None, auth_params['ts']),
}
return self._perform_request(url=self.snap_url, files=form_data, headers=self.headers)
def search(self, image_path: str, search_mode: str = "default") -> Dict[str, Any]:
"""
执行图片搜索
:param image_path: 本地图片文件路径
:param search_mode: 搜索模式 ('default' 或 'full_image')
:return: 包含搜索结果的字典
"""
# 默认的返回值字典
base_result = {
"is_web": 0, "is_app": 0, "asin_list_web": "", "asin_list_app": "",
"asin_list_join": "", "site_name": self.site_name,
"search_url": None, "mode": search_mode
}
if not self.session_id:
logger.error("Cookies中缺少'session-id',无法继续。")
return base_result
try:
# 步骤 1: 执行默认搜索(全图模式需依赖此步骤的query_id)
default_response = self._default_search(image_path) # 得到首次以图搜物响应
# 处理默认模式结果
if search_mode == "default":
parsed_result = self._parse_response(default_response)
if parsed_result:
base_result.update(parsed_result) # 更新返回值
base_result["asin_list_join"] = parsed_result["asin_list_app"]
return base_result
# 处理全图模式结果
elif search_mode == "full_image":
query_id = default_response.get("queryId")
if not query_id:
logger.error("默认识别未返回 queryId,无法进行全图搜索。")
return base_result
full_image_response = self._full_image_search(query_id, image_path) # 带上queryId 第二次请求 获取全图识别结果
parsed_result = self._parse_response(full_image_response)
if parsed_result:
base_result.update(parsed_result)
base_result["asin_list_join"] = parsed_result["asin_list_app"]
return base_result
else:
logger.error(f"不支持的搜索模式: {search_mode}")
return base_result
except Exception as e:
logger.error(f"处理 '{search_mode}' 模式搜索时发生异常: {e}")
return base_result
if __name__ == "__main__":
# 模拟图片路径
image_file_path = "temp_img/B0BYNB2J6W.jpg"
try:
# 测试不同站点(如"us"、"uk"、"de")
for site in ["us", "uk", "de"]:
logger.info(f"\n{'=' * 20} 测试站点: {site} {'=' * 20}")
client = AmazonImageSearch(site_name=site)
# 测试默认识别模式
logger.info("\n--- 默认识别模式 ---")
default_result = client.search(image_file_path, search_mode="default")
logger.info(f"结果: {default_result}")
# 测试全图识别模式
logger.info("\n--- 全图识别模式 ---")
full_image_result = client.search(image_file_path, search_mode="full_image")
logger.info(f"结果: {full_image_result}")
except ValueError as e:
logger.error(f"初始化失败: {e}")
except Exception as e:
logger.error(f"执行过程中发生错误: {e}")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment