Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
spider
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
selection-new
spider
Commits
b441e323
Commit
b441e323
authored
Jun 15, 2026
by
hejiangming
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更新 tls ua 请求头关系
parent
477a5fac
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
52 additions
and
35 deletions
+52
-35
chrome_us_list.py
...pider_code/app_amazon_image_recognition/chrome_us_list.py
+0
-0
inv_img_double_search.py
...ode/app_amazon_image_recognition/inv_img_double_search.py
+3
-35
ua_tls_profiles.py
...ider_code/app_amazon_image_recognition/ua_tls_profiles.py
+49
-0
No files found.
hjm_spider_code/app_amazon_image_recognition/chrome_us_list.py
deleted
100644 → 0
View file @
477a5fac
This diff is collapsed.
Click to expand it.
hjm_spider_code/app_amazon_image_recognition/inv_img_double_search.py
View file @
b441e323
...
@@ -18,7 +18,7 @@ from scrapy import Selector
...
@@ -18,7 +18,7 @@ from scrapy import Selector
from
requests.exceptions
import
RequestException
,
JSONDecodeError
,
Timeout
,
ConnectionError
,
HTTPError
from
requests.exceptions
import
RequestException
,
JSONDecodeError
,
Timeout
,
ConnectionError
,
HTTPError
from
loguru
import
logger
from
loguru
import
logger
from
cookie_manager
import
cookie_manager
from
cookie_manager
import
cookie_manager
from
chrome_us_list
import
get_random_ua
from
ua_tls_profiles
import
build_browser_headers
# 导入配置
# 导入配置
from
curl_cffi
import
requests
as
curl_cffi_requests
from
curl_cffi
import
requests
as
curl_cffi_requests
from
amazon_configs
import
(
from
amazon_configs
import
(
...
@@ -418,41 +418,9 @@ class AmazonImageSearch:
...
@@ -418,41 +418,9 @@ class AmazonImageSearch:
# logger.info(f'解析成功 当前页{len(items)}条数据')
# logger.info(f'解析成功 当前页{len(items)}条数据')
return
items
return
items
@staticmethod
def
extract_chrome_version_from_ua
(
ua
:
str
)
->
Optional
[
int
]:
"""从UA字符串中提取Chrome主版本号(如从Chrome/136.0.7015.93提取136)"""
match
=
re
.
search
(
r"Chrome/(\d+)\."
,
ua
)
return
int
(
match
.
group
(
1
))
if
match
else
None
@staticmethod
def
get_tls_fingerprint_by_chrome_version
(
version
:
Optional
[
int
])
->
str
:
"""根据Chrome主版本号映射 curl_cffi 的 impersonate 指纹"""
if
version
is
None
:
return
"chrome120"
elif
version
>=
130
:
return
"chrome131"
elif
version
>=
120
:
return
"chrome120"
elif
version
>=
110
:
return
"chrome110"
elif
version
>=
100
:
return
"chrome101"
elif
version
>=
95
:
return
"chrome99"
else
:
return
"chrome104"
# 默认兜底
def
_build_browser_headers
(
self
):
def
_build_browser_headers
(
self
):
"""随机生成 UA + 匹配的 TLS 指纹,每次请求调用一次"""
"""随机选一条 UA/TLS 精确配对 profile,返回 (headers, impersonate)"""
ua
=
get_random_ua
()
return
build_browser_headers
()
tls
=
self
.
get_tls_fingerprint_by_chrome_version
(
self
.
extract_chrome_version_from_ua
(
ua
))
headers
=
{
"accept"
:
"text/html,application/xhtml+xml,*/*"
,
"Accept-Language"
:
"en-US,en;q=0.9"
,
"Accept-Encoding"
:
"gzip, deflate"
,
"user-agent"
:
ua
,
}
return
headers
,
tls
def
_fetch_single_page
(
self
,
url
:
str
,
page
:
int
)
->
List
[
Dict
[
str
,
Any
]]:
def
_fetch_single_page
(
self
,
url
:
str
,
page
:
int
)
->
List
[
Dict
[
str
,
Any
]]:
try
:
try
:
...
...
hjm_spider_code/app_amazon_image_recognition/ua_tls_profiles.py
0 → 100644
View file @
b441e323
"""
UA / TLS 指纹精确配对池
- 每条 profile 内部自洽:UA 版本 == impersonate 版本,sec-ch-ua 品牌版本与之一致,
操作系统仅 Windows / macOS。
- impersonate 取值均为 curl_cffi 0.13 原生支持的较新桌面 Chrome 目标,
避免出现「UA 说 136、TLS 指纹是 131」这种错位。
- 真实 Chrome on Mac 的系统号冻结在 10_15_7。
"""
import
random
UA_TLS_PROFILES
=
[
# ---- Windows ----
{
"impersonate"
:
"chrome136"
,
"version"
:
"136"
,
"platform"
:
"Windows"
,
"ua"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
},
{
"impersonate"
:
"chrome131"
,
"version"
:
"131"
,
"platform"
:
"Windows"
,
"ua"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
},
{
"impersonate"
:
"chrome124"
,
"version"
:
"124"
,
"platform"
:
"Windows"
,
"ua"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
},
{
"impersonate"
:
"chrome120"
,
"version"
:
"120"
,
"platform"
:
"Windows"
,
"ua"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
},
# ---- macOS ----
{
"impersonate"
:
"chrome136"
,
"version"
:
"136"
,
"platform"
:
"macOS"
,
"ua"
:
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
},
{
"impersonate"
:
"chrome131"
,
"version"
:
"131"
,
"platform"
:
"macOS"
,
"ua"
:
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
},
{
"impersonate"
:
"chrome123"
,
"version"
:
"123"
,
"platform"
:
"macOS"
,
"ua"
:
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
},
]
def
build_browser_headers
():
"""随机选一条 profile,返回 (headers, impersonate)。
headers 中的 UA、sec-ch-ua、sec-ch-ua-platform 三者与所选 impersonate 自洽,
每次请求调用一次即可。
"""
profile
=
random
.
choice
(
UA_TLS_PROFILES
)
v
=
profile
[
"version"
]
headers
=
{
"accept"
:
"text/html,application/xhtml+xml,*/*"
,
"Accept-Language"
:
"en-US,en;q=0.9"
,
"Accept-Encoding"
:
"gzip, deflate"
,
"user-agent"
:
profile
[
"ua"
],
"sec-ch-ua"
:
f
'"Chromium";v="{v}", "Google Chrome";v="{v}", "Not.A/Brand";v="99"'
,
"sec-ch-ua-mobile"
:
"?0"
,
"sec-ch-ua-platform"
:
f
'"{profile["platform"]}"'
,
}
return
headers
,
profile
[
"impersonate"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment