no message

parent dfdb4f08
import hashlib
import time
import uuid
from typing import Optional, List
from loguru import logger
import requests
from requests.exceptions import RequestException, ConnectionError, Timeout
# 配置日志
logger.add("amazon_search.log", rotation="10 MB", level="INFO")
# 全局变量(clientDeviceId 固定,cookie 失效时更新)
GLOBAL_CLIENT_DEVICE_ID = str(uuid.uuid4())
logger.info(f"初始化全局 clientDeviceId: {GLOBAL_CLIENT_DEVICE_ID}")
# 常量配置
AMAZON_STYLE_SNAP_URL = "https://match-visualsearch.amazon.com/style-snap/2.0"
AMAZON_SEARCH_BASE_URL = "https://www.amazon.com/s?rh=p_78:{bbx_asin_list}&rank=asin-scores-asc-rank&searchMethod=CameraSearch"
SECRET = "5b6874d3a20417591bd5464a25a37bc6"
APPLICATION = "amzn-mbl-cscan-us"
USERNAME = "amzn-mbl-cscan-us"
RETRY_TIMES = 5 # 重试次数
RETRY_DELAY = 1 # 重试间隔(秒)
def get_amazon_search_url(image_path: str, cookies: dict) -> Optional[str]:
"""
调用亚马逊以图搜物接口,获取搜索结果并生成最终搜索URL
:param image_path: 图片本地路径
:param cookies: 亚马逊请求所需的cookies
:return: 拼接后的搜索URL,失败时返回None
"""
for retry in range(RETRY_TIMES):
try:
logger.info(f"开始第 {retry + 1}/{RETRY_TIMES} 次请求")
# 1. 准备动态参数
session_id = cookies.get("session-id", "")
if not session_id:
logger.error("Cookies中缺少session-id")
return None
client_id = str(uuid.uuid4()) # 每次请求生成新的clientId
ts = str(int(time.time()))
# 2. 生成authtoken
combined = SECRET + USERNAME + APPLICATION + ts
authtoken = hashlib.sha512(combined.encode("utf-8")).hexdigest()
logger.debug(f"生成authtoken: {authtoken[:10]}...") # 只打印前10位避免过长
# 3. 构建query_metadata
query_metadata = (
'{"amznSessionId":"%s","clientVersion":"30.20.2.100","cardsVersion":"1.0",'
'"clientMessageVersion":"1.0","amznDirectedCustomerId":"","clientDeviceId":"%s",'
'"clientDevice":"Android - Pixel 2","deviceManufacturer":"Google","clientDeviceVersion":"10",'
'"clientId":"%s","orientation":"-1","sourceType":"Photo","ingressSource":"ctp","uiMode":"stylesnap"}'
% (session_id, GLOBAL_CLIENT_DEVICE_ID, client_id)
)
# logger.debug(f"构建query_metadata: {query_metadata[:50]}...") # 只打印前50位
# 4. 读取图片文件
try:
with open(image_path, "rb") as f:
image_data = f.read()
logger.info(f"成功读取图片: {image_path} (大小: {len(image_data)} bytes)")
except Exception as e:
logger.error(f"读取图片失败: {str(e)}", exc_info=True)
return None # 图片错误无需重试
# 5. 构建请求头
headers = {
"x-amz-access-token": "",
"x-amz-lens-session-auth-token": cookies.get("session-token", ""),
"x-amz-lens-session-id": session_id,
"x-amz-lens-ubid": cookies.get("ubid-main", ""),
"accept-encoding": "gzip",
"user-agent": "okhttp/4.9.1"
}
# 6. 构建表单数据
files = [
("application", (None, APPLICATION, "multipart/form-data; charset=utf-8")),
("query_metadata", (None, query_metadata, "multipart/form-data; charset=utf-8")),
("authtoken", (None, authtoken, "multipart/form-data; charset=utf-8")),
("lang", (None, "en_US", "multipart/form-data; charset=utf-8")),
("username", (None, USERNAME, "multipart/form-data; charset=utf-8")),
("ts", (None, ts, "multipart/form-data; charset=utf-8")),
("file", ("", image_data, "image/jpeg"))
]
# 7. 发送请求
logger.info("发送POST请求到亚马逊以图搜物接口")
response = requests.post(
url=AMAZON_STYLE_SNAP_URL,
headers=headers,
files=files,
cookies=cookies,
timeout=10 # 10秒超时
)
response.raise_for_status() # 抛出HTTP错误状态码
logger.info(f"请求成功,状态码: {response.status_code}")
# 8. 解析响应
try:
response_json = response.json()
# logger.debug(f"响应数据: {str(response_json)[:100]}...") # 打印前100字符
except ValueError:
logger.error("响应不是有效的JSON格式", exc_info=True)
if retry < RETRY_TIMES - 1:
logger.info(f"等待 {RETRY_DELAY} 秒后重试...")
time.sleep(RETRY_DELAY)
continue
return None
# 9. 提取bbxAsinList
try:
search_result = response_json.get("style-snap", {}).get("searchResult", [])
if not search_result:
logger.warning("searchResult列表为空")
return None
first_result = search_result[0]
bbx_asin_list = first_result.get("bbxAsinList", [])
if not bbx_asin_list:
logger.warning("bbxAsinList列表为空")
return None
logger.info(f"成功提取bbxAsinList,共 {len(bbx_asin_list)} 个元素,注意 实际有些不可用 打开网页数据少几个为正常现象")
joined_asin = "|".join(bbx_asin_list)
final_url = AMAZON_SEARCH_BASE_URL.format(bbx_asin_list=joined_asin)
logger.success(f"生成最终搜索URL: {final_url}")
return final_url
except (IndexError, KeyError) as e:
logger.error(f"解析响应结构失败: {str(e)}", exc_info=True)
if retry < RETRY_TIMES - 1:
logger.info(f"等待 {RETRY_DELAY} 秒后重试...")
time.sleep(RETRY_DELAY)
continue
return None
except (ConnectionError, Timeout) as e:
logger.error(f"网络错误: {str(e)}", exc_info=True)
except RequestException as e:
# 处理HTTP错误(如401/403可能是cookie过期)
status_code = getattr(e.response, "status_code", None)
logger.error(f"请求异常 (状态码: {status_code}): {str(e)}", exc_info=True)
# 401/403 通常是认证失效,直接终止重试(需要更新cookie)
if status_code in (401, 403):
logger.warning("检测到认证失效,终止重试")
return None
except Exception as e:
logger.error(f"未知错误: {str(e)}", exc_info=True)
# 重试前等待
if retry < RETRY_TIMES - 1:
logger.info(f"等待 {RETRY_DELAY} 秒后重试...")
time.sleep(RETRY_DELAY)
logger.error(f"经过 {RETRY_TIMES} 次重试后仍失败")
return None
if __name__ == "__main__":
sample_cookies = {"i18n-prefs": "USD", "lc-main": "en_US", "session-id": "131-0347800-4175077", "session-id-time": "2082787201l", "session-token": "Jo+AthxsQrcFH8qeii+sHhoo7puFd/cpJUEsjnWXtCLhr8ycF9TQSAv9zuyoAvFjfmXZuACFNa/D+i5et63EafMMPDK/825m8TUtNtlO88KmmEsm94fiyoPL0UakTyZsUBv/CzndcKB7h0K3NkbeFws9gZSdwYRGJFVeX+pPQ9ceN0WkE+XLwCt0plIIxG3BC+VtdFJWxPKxH+R5dlnbtxPso2S5zlrOf1FTEdGNNhNZvVq25XeydshrSp7AKG6VUOicnipgfAY0Qle3Y4bw72N1IqY9i3rXVZlkrkGePamBxew+Vel7U8ccVsEIT/vtOtLHPfsTljTgltlJU0bzk0YeoJ1LwI9S", "skin": "noskin", "ubid-main": "134-9889499-1876667"}
# 调用函数
result_url = get_amazon_search_url("test_image/amazon3.png", sample_cookies)
if result_url:
logger.success(f"最终搜索URL: {result_url}")
else:
logger.error("未能生成搜索URL")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment