Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
spider
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
selection-new
spider
Commits
d7f6ddab
Commit
d7f6ddab
authored
Nov 18, 2025
by
PC-20230618BYKI\Administrator
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
dfdb4f08
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
175 deletions
+0
-175
amazon_configs.py
...pider_code/app_amazon_image_recognition/amazon_configs.py
+0
-0
app_amazon_image_recognition.py
..._amazon_image_recognition/app_amazon_image_recognition.py
+0
-175
app_img_search_api_new.py
...de/app_amazon_image_recognition/app_img_search_api_new.py
+0
-0
threading_image_demo.py
...code/app_amazon_image_recognition/threading_image_demo.py
+0
-0
No files found.
hjm_spider_code/app_amazon_image_recognition/amazon_configs.py
0 → 100644
View file @
d7f6ddab
This diff is collapsed.
Click to expand it.
hjm_spider_code/app_amazon_image_recognition/app_amazon_image_recognition.py
deleted
100644 → 0
View file @
dfdb4f08
import
hashlib
import
time
import
uuid
from
typing
import
Optional
,
List
from
loguru
import
logger
import
requests
from
requests.exceptions
import
RequestException
,
ConnectionError
,
Timeout
# 配置日志
logger
.
add
(
"amazon_search.log"
,
rotation
=
"10 MB"
,
level
=
"INFO"
)
# 全局变量(clientDeviceId 固定,cookie 失效时更新)
GLOBAL_CLIENT_DEVICE_ID
=
str
(
uuid
.
uuid4
())
logger
.
info
(
f
"初始化全局 clientDeviceId: {GLOBAL_CLIENT_DEVICE_ID}"
)
# 常量配置
AMAZON_STYLE_SNAP_URL
=
"https://match-visualsearch.amazon.com/style-snap/2.0"
AMAZON_SEARCH_BASE_URL
=
"https://www.amazon.com/s?rh=p_78:{bbx_asin_list}&rank=asin-scores-asc-rank&searchMethod=CameraSearch"
SECRET
=
"5b6874d3a20417591bd5464a25a37bc6"
APPLICATION
=
"amzn-mbl-cscan-us"
USERNAME
=
"amzn-mbl-cscan-us"
RETRY_TIMES
=
5
# 重试次数
RETRY_DELAY
=
1
# 重试间隔(秒)
def
get_amazon_search_url
(
image_path
:
str
,
cookies
:
dict
)
->
Optional
[
str
]:
"""
调用亚马逊以图搜物接口,获取搜索结果并生成最终搜索URL
:param image_path: 图片本地路径
:param cookies: 亚马逊请求所需的cookies
:return: 拼接后的搜索URL,失败时返回None
"""
for
retry
in
range
(
RETRY_TIMES
):
try
:
logger
.
info
(
f
"开始第 {retry + 1}/{RETRY_TIMES} 次请求"
)
# 1. 准备动态参数
session_id
=
cookies
.
get
(
"session-id"
,
""
)
if
not
session_id
:
logger
.
error
(
"Cookies中缺少session-id"
)
return
None
client_id
=
str
(
uuid
.
uuid4
())
# 每次请求生成新的clientId
ts
=
str
(
int
(
time
.
time
()))
# 2. 生成authtoken
combined
=
SECRET
+
USERNAME
+
APPLICATION
+
ts
authtoken
=
hashlib
.
sha512
(
combined
.
encode
(
"utf-8"
))
.
hexdigest
()
logger
.
debug
(
f
"生成authtoken: {authtoken[:10]}..."
)
# 只打印前10位避免过长
# 3. 构建query_metadata
query_metadata
=
(
'{"amznSessionId":"
%
s","clientVersion":"30.20.2.100","cardsVersion":"1.0",'
'"clientMessageVersion":"1.0","amznDirectedCustomerId":"","clientDeviceId":"
%
s",'
'"clientDevice":"Android - Pixel 2","deviceManufacturer":"Google","clientDeviceVersion":"10",'
'"clientId":"
%
s","orientation":"-1","sourceType":"Photo","ingressSource":"ctp","uiMode":"stylesnap"}'
%
(
session_id
,
GLOBAL_CLIENT_DEVICE_ID
,
client_id
)
)
# logger.debug(f"构建query_metadata: {query_metadata[:50]}...") # 只打印前50位
# 4. 读取图片文件
try
:
with
open
(
image_path
,
"rb"
)
as
f
:
image_data
=
f
.
read
()
logger
.
info
(
f
"成功读取图片: {image_path} (大小: {len(image_data)} bytes)"
)
except
Exception
as
e
:
logger
.
error
(
f
"读取图片失败: {str(e)}"
,
exc_info
=
True
)
return
None
# 图片错误无需重试
# 5. 构建请求头
headers
=
{
"x-amz-access-token"
:
""
,
"x-amz-lens-session-auth-token"
:
cookies
.
get
(
"session-token"
,
""
),
"x-amz-lens-session-id"
:
session_id
,
"x-amz-lens-ubid"
:
cookies
.
get
(
"ubid-main"
,
""
),
"accept-encoding"
:
"gzip"
,
"user-agent"
:
"okhttp/4.9.1"
}
# 6. 构建表单数据
files
=
[
(
"application"
,
(
None
,
APPLICATION
,
"multipart/form-data; charset=utf-8"
)),
(
"query_metadata"
,
(
None
,
query_metadata
,
"multipart/form-data; charset=utf-8"
)),
(
"authtoken"
,
(
None
,
authtoken
,
"multipart/form-data; charset=utf-8"
)),
(
"lang"
,
(
None
,
"en_US"
,
"multipart/form-data; charset=utf-8"
)),
(
"username"
,
(
None
,
USERNAME
,
"multipart/form-data; charset=utf-8"
)),
(
"ts"
,
(
None
,
ts
,
"multipart/form-data; charset=utf-8"
)),
(
"file"
,
(
""
,
image_data
,
"image/jpeg"
))
]
# 7. 发送请求
logger
.
info
(
"发送POST请求到亚马逊以图搜物接口"
)
response
=
requests
.
post
(
url
=
AMAZON_STYLE_SNAP_URL
,
headers
=
headers
,
files
=
files
,
cookies
=
cookies
,
timeout
=
10
# 10秒超时
)
response
.
raise_for_status
()
# 抛出HTTP错误状态码
logger
.
info
(
f
"请求成功,状态码: {response.status_code}"
)
# 8. 解析响应
try
:
response_json
=
response
.
json
()
# logger.debug(f"响应数据: {str(response_json)[:100]}...") # 打印前100字符
except
ValueError
:
logger
.
error
(
"响应不是有效的JSON格式"
,
exc_info
=
True
)
if
retry
<
RETRY_TIMES
-
1
:
logger
.
info
(
f
"等待 {RETRY_DELAY} 秒后重试..."
)
time
.
sleep
(
RETRY_DELAY
)
continue
return
None
# 9. 提取bbxAsinList
try
:
search_result
=
response_json
.
get
(
"style-snap"
,
{})
.
get
(
"searchResult"
,
[])
if
not
search_result
:
logger
.
warning
(
"searchResult列表为空"
)
return
None
first_result
=
search_result
[
0
]
bbx_asin_list
=
first_result
.
get
(
"bbxAsinList"
,
[])
if
not
bbx_asin_list
:
logger
.
warning
(
"bbxAsinList列表为空"
)
return
None
logger
.
info
(
f
"成功提取bbxAsinList,共 {len(bbx_asin_list)} 个元素,注意 实际有些不可用 打开网页数据少几个为正常现象"
)
joined_asin
=
"|"
.
join
(
bbx_asin_list
)
final_url
=
AMAZON_SEARCH_BASE_URL
.
format
(
bbx_asin_list
=
joined_asin
)
logger
.
success
(
f
"生成最终搜索URL: {final_url}"
)
return
final_url
except
(
IndexError
,
KeyError
)
as
e
:
logger
.
error
(
f
"解析响应结构失败: {str(e)}"
,
exc_info
=
True
)
if
retry
<
RETRY_TIMES
-
1
:
logger
.
info
(
f
"等待 {RETRY_DELAY} 秒后重试..."
)
time
.
sleep
(
RETRY_DELAY
)
continue
return
None
except
(
ConnectionError
,
Timeout
)
as
e
:
logger
.
error
(
f
"网络错误: {str(e)}"
,
exc_info
=
True
)
except
RequestException
as
e
:
# 处理HTTP错误(如401/403可能是cookie过期)
status_code
=
getattr
(
e
.
response
,
"status_code"
,
None
)
logger
.
error
(
f
"请求异常 (状态码: {status_code}): {str(e)}"
,
exc_info
=
True
)
# 401/403 通常是认证失效,直接终止重试(需要更新cookie)
if
status_code
in
(
401
,
403
):
logger
.
warning
(
"检测到认证失效,终止重试"
)
return
None
except
Exception
as
e
:
logger
.
error
(
f
"未知错误: {str(e)}"
,
exc_info
=
True
)
# 重试前等待
if
retry
<
RETRY_TIMES
-
1
:
logger
.
info
(
f
"等待 {RETRY_DELAY} 秒后重试..."
)
time
.
sleep
(
RETRY_DELAY
)
logger
.
error
(
f
"经过 {RETRY_TIMES} 次重试后仍失败"
)
return
None
if
__name__
==
"__main__"
:
sample_cookies
=
{
"i18n-prefs"
:
"USD"
,
"lc-main"
:
"en_US"
,
"session-id"
:
"131-0347800-4175077"
,
"session-id-time"
:
"2082787201l"
,
"session-token"
:
"Jo+AthxsQrcFH8qeii+sHhoo7puFd/cpJUEsjnWXtCLhr8ycF9TQSAv9zuyoAvFjfmXZuACFNa/D+i5et63EafMMPDK/825m8TUtNtlO88KmmEsm94fiyoPL0UakTyZsUBv/CzndcKB7h0K3NkbeFws9gZSdwYRGJFVeX+pPQ9ceN0WkE+XLwCt0plIIxG3BC+VtdFJWxPKxH+R5dlnbtxPso2S5zlrOf1FTEdGNNhNZvVq25XeydshrSp7AKG6VUOicnipgfAY0Qle3Y4bw72N1IqY9i3rXVZlkrkGePamBxew+Vel7U8ccVsEIT/vtOtLHPfsTljTgltlJU0bzk0YeoJ1LwI9S"
,
"skin"
:
"noskin"
,
"ubid-main"
:
"134-9889499-1876667"
}
# 调用函数
result_url
=
get_amazon_search_url
(
"test_image/amazon3.png"
,
sample_cookies
)
if
result_url
:
logger
.
success
(
f
"最终搜索URL: {result_url}"
)
else
:
logger
.
error
(
"未能生成搜索URL"
)
\ No newline at end of file
hjm_spider_code/app_amazon_image_recognition/app_img_search_api.py
→
hjm_spider_code/app_amazon_image_recognition/app_img_search_api
_new
.py
View file @
d7f6ddab
This diff is collapsed.
Click to expand it.
hjm_spider_code/app_amazon_image_recognition/threading_image_demo.py
View file @
d7f6ddab
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment