Commit b441e323 by hejiangming

更新 tls ua 请求头关系

parent 477a5fac
...@@ -18,7 +18,7 @@ from scrapy import Selector ...@@ -18,7 +18,7 @@ from scrapy import Selector
from requests.exceptions import RequestException, JSONDecodeError, Timeout, ConnectionError, HTTPError from requests.exceptions import RequestException, JSONDecodeError, Timeout, ConnectionError, HTTPError
from loguru import logger from loguru import logger
from cookie_manager import cookie_manager from cookie_manager import cookie_manager
from chrome_us_list import get_random_ua from ua_tls_profiles import build_browser_headers
# 导入配置 # 导入配置
from curl_cffi import requests as curl_cffi_requests from curl_cffi import requests as curl_cffi_requests
from amazon_configs import ( from amazon_configs import (
...@@ -418,41 +418,9 @@ class AmazonImageSearch: ...@@ -418,41 +418,9 @@ class AmazonImageSearch:
# logger.info(f'解析成功 当前页{len(items)}条数据') # logger.info(f'解析成功 当前页{len(items)}条数据')
return items return items
@staticmethod
def extract_chrome_version_from_ua(ua: str) -> Optional[int]:
"""从UA字符串中提取Chrome主版本号(如从Chrome/136.0.7015.93提取136)"""
match = re.search(r"Chrome/(\d+)\.", ua)
return int(match.group(1)) if match else None
@staticmethod
def get_tls_fingerprint_by_chrome_version(version: Optional[int]) -> str:
"""根据Chrome主版本号映射 curl_cffi 的 impersonate 指纹"""
if version is None:
return "chrome120"
elif version >= 130:
return "chrome131"
elif version >= 120:
return "chrome120"
elif version >= 110:
return "chrome110"
elif version >= 100:
return "chrome101"
elif version >= 95:
return "chrome99"
else:
return "chrome104" # 默认兜底
def _build_browser_headers(self): def _build_browser_headers(self):
"""随机生成 UA + 匹配的 TLS 指纹,每次请求调用一次""" """随机选一条 UA/TLS 精确配对 profile,返回 (headers, impersonate)"""
ua = get_random_ua() return build_browser_headers()
tls = self.get_tls_fingerprint_by_chrome_version(self.extract_chrome_version_from_ua(ua))
headers = {
"accept": "text/html,application/xhtml+xml,*/*",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate",
"user-agent": ua,
}
return headers, tls
def _fetch_single_page(self, url: str, page: int) -> List[Dict[str, Any]]: def _fetch_single_page(self, url: str, page: int) -> List[Dict[str, Any]]:
try: try:
......
"""
UA / TLS 指纹精确配对池
- 每条 profile 内部自洽:UA 版本 == impersonate 版本,sec-ch-ua 品牌版本与之一致,
操作系统仅 Windows / macOS。
- impersonate 取值均为 curl_cffi 0.13 原生支持的较新桌面 Chrome 目标,
避免出现「UA 说 136、TLS 指纹是 131」这种错位。
- 真实 Chrome on Mac 的系统号冻结在 10_15_7。
"""
import random
UA_TLS_PROFILES = [
# ---- Windows ----
{"impersonate": "chrome136", "version": "136", "platform": "Windows",
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"},
{"impersonate": "chrome131", "version": "131", "platform": "Windows",
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"},
{"impersonate": "chrome124", "version": "124", "platform": "Windows",
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"},
{"impersonate": "chrome120", "version": "120", "platform": "Windows",
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"},
# ---- macOS ----
{"impersonate": "chrome136", "version": "136", "platform": "macOS",
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"},
{"impersonate": "chrome131", "version": "131", "platform": "macOS",
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"},
{"impersonate": "chrome123", "version": "123", "platform": "macOS",
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"},
]
def build_browser_headers():
"""随机选一条 profile,返回 (headers, impersonate)。
headers 中的 UA、sec-ch-ua、sec-ch-ua-platform 三者与所选 impersonate 自洽,
每次请求调用一次即可。
"""
profile = random.choice(UA_TLS_PROFILES)
v = profile["version"]
headers = {
"accept": "text/html,application/xhtml+xml,*/*",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate",
"user-agent": profile["ua"],
"sec-ch-ua": f'"Chromium";v="{v}", "Google Chrome";v="{v}", "Not.A/Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": f'"{profile["platform"]}"',
}
return headers, profile["impersonate"]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment