Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
spider
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
selection-new
spider
Commits
000d315d
Commit
000d315d
authored
Jul 28, 2025
by
Peng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
8dd9963f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
58 additions
and
18 deletions
+58
-18
get_junglescout_rank.py
py_spider/amazon_every_day_spider/get_junglescout_rank.py
+4
-4
junglescout_spider.py
py_spider/amazon_every_day_spider/junglescout_spider.py
+3
-3
save_all_syn_st_minid_maxid.py
py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
+1
-1
asin_detail_pg.py
py_spider/amazon_spider/asin_detail_pg.py
+3
-4
recall_cases_spider.py
py_spider/amazon_spider/recall_cases_spider.py
+0
-0
pares_html.py
py_spider/amzon_parse_db_html/pares_html.py
+3
-3
asin_parse.py
py_spider/utils/asin_parse.py
+44
-3
No files found.
py_spider/amazon_every_day_spider/get_junglescout_rank.py
View file @
000d315d
...
@@ -350,7 +350,7 @@ def junglescout_spider(db_base):
...
@@ -350,7 +350,7 @@ def junglescout_spider(db_base):
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Cache-Control"
:
"no-cache"
,
"Cache-Control"
:
"no-cache"
,
'Cookie'
:
'_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN;
x-hng=lang=zh-CN&domain=www.sellersprite.com; 8f00639f9c446a2d0213=54fb71d3f2c9e8acb7878e0f73abbf33; _gcl_au=1.1.420472597.1749119222.719336435.1751886424.1751886424; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751516385,1751886410,1751973053,1752031904; HMACCOUNT=800EBCCFB4C6BBFB; 65722c3d8208b58d42f9=7dc2ebaa5e4a51182da4ade1aacd8dc4; rank-guest-user=6159802571t3e3obe8rwmCywrH0Xq28vOMfd8Q+siSpAi1WiGPGuuMcYrYhXyf/QpgeBCBdgCT; rank-login-user=6159802571t3e3obe8rwmCywrH0Xq28mIqu6gO0eXYPrSqY9RlSIznMsavLuIJkOkjELzcr/d1; rank-login-user-info="eyJuaWNrbmFtZSI6Iuilv+mXqOWQuembqiIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTMzKioqKjU0MDciLCJ0b2tlbiI6IjYxNTk4MDI1NzF0M2Uzb2JlOHJ3bUN5d3JIMFhxMjhtSXF1NmdPMGVYWVByU3FZOVJsU0l6bk1zYXZMdUlKa09rakVMemNyL2QxIn0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJJUGFLc3VqMkZsUmpPR1NRQnIxYkJRIiwiaWF0IjoxNzUyMDMxOTE2LCJleHAiOjE3NTIxMTgzMTYsIm5iZiI6MTc1MjAzMTg1Niwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTMzNDkzLCJwaSI6bnVsbCwibm4iOiLopb_pl6jlkLnpm6oiLCJzeXMiOiJTU19DTiIsImVkIjoiTiIsInBobiI6IjEzMzkyNDE1NDA3IiwiZW0iOiJxcTE2NTMxMjE4NjUzQDE2My5jb20iLCJtbCI6IkcifQ.mLIjN_qO4K8w18IDVa0GCRY3MODTmJhZlQaPbgBjeYJRPDwteHfkfqFS_GFyLu4svoahzyFRxkdnKhxs1x90QxQ-7QCwjwypbk8On6gMarKl8jopo9sJbZITvk8mrqtoT6N34LZ1ash35iAkIuPZONPMH8_cp5NxiSC70J12fvIT9ZXp-9zvEk6WV8qQ3pRr0yRuGnSsuWjVvDE9WRNpE3ZmYS_EUBroA51yBEPdS8aBThRuuVGt4HuqrPXp9ZwHoiOcRYu1VcQu-wpIAhLfXcnY1vJA3FXm7w_H00DOGZuM9HRcxdg6Fj-2WP5FvCxbE8z5n1-zbQMs_J8JVaVXgQ; ao_lo_to_n="6159802571t3e3obe8rwmCywrH0Xq28osFyhyxlRsfXXDx9AUjMD2qAFgWUPkLF84KewBkZoL5OL21x5jznuxdPNdiJfglPNE7YH03Vk5CofaP+MGH3y8="; _gaf_fp=01fef3c14bfcaf5a01438f74a677e95a; _ga_38NCVF2XST=GS2.1.s1752031904$o47$g1$t1752031923$j41$l0$h1543227925; _ga_CN0F80S6GL=GS2.1.s1752031906$o46$g1$t1752031924$j42$l0$h0; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1752035308; JSESSIONID=165F9BAA752FE5B22CCD7C5BB7B62F2F
'
,
'Cookie'
:
'_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN;
Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1752031904,1752460043,1752653436,1753353401; HMACCOUNT=800EBCCFB4C6BBFB; 894cdd1d9741ce0c9757=827b7d3d13ed7bd6b4b1b24d0246b3dc; 3d854e1bcd61963fdf05=38fcb3b742a48aa345ddfd7136bc60ee; _gaf_fp=f297033bfe53aa9891ffe2842271566b; _gcl_au=1.1.420472597.1749119222.1054917286.1753685435.1753685437; rank-guest-user=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9lihUCQIaVmrHXjbpSRP/Ca0F; rank-login-user=6303473571KK6FnhfedvWg9tSSyk3xj2GRIc/8HSm4vuPYVHI5vKLXnssgei5ccK1dG8fkQSFI; rank-login-user-info=eyJuaWNrbmFtZSI6IuW4heWTpSIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTgzKioqKjczNDciLCJ0b2tlbiI6IjYzMDM0NzM1NzFLSzZGbmhmZWR2V2c5dFNTeWszeGoyR1JJYy84SFNtNHZ1UFlWSEk1dktMWG5zc2dlaTVjY0sxZEc4ZmtRU0ZJIn0=; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJKc2pZSlZWeFZzTVptVWFvMzgtZ3RRIiwiaWF0IjoxNzUzNjg1NDM2LCJleHAiOjE3NTM3NzE4MzYsIm5iZiI6MTc1MzY4NTM3Niwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIxNSwicGkiOm51bGwsIm5uIjoi5biF5ZOlIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxODMwNzk2NzM0NyIsImVtIjoiMzE1OTk4MDg5MkBxcS5jb20iLCJtbCI6IkcifQ.EaQ7Md7iVOpjZDogkiS2DlndhFPt3GzL2t33LXnh9Z5Itr3A8scFM_tzrYuzXqF6a-BDIMFe90SdDtU18zs9WTTl6_Phv3AEqcDe6WDfPAhB_KMa15VYAE5-b9d3lgIukKR8ZZyAMpiJzcmIWShmqxrhCNQD0ER3b7idaJpSrJiKnwV-tj6La52WJ6BmVRAk8gst0p5h-SYVnNz9iNaSXLc2Dx-hHZvMVNU27yfbJgKPpzRxgh7TOD7O-cT0WrEoKvTSw9e81gG9bgvKuA_bD-z3ePhgM6prUfceWszD88KH8PcXua9s_8ZM4bgrMyKMHswLtwyLhWePcvtHUp6yyQ; ao_lo_to_n=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9liibP9br/hwQ1Dlb4xDZyVPrTQIst5JCVz4PpnUIlDMGE07YVPYBWOm3Hrx4PaVkgaQ=; _ga_38NCVF2XST=GS2.1.s1753685428$o61$g1$t1753685444$j44$l0$h984121357; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1753685445; _ga_CN0F80S6GL=GS2.1.s1753685429$o59$g1$t1753685445$j44$l0$h0; JSESSIONID=F09543D3A3D6F890BAD0F422FCA49942
'
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
}
}
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
...
@@ -368,7 +368,7 @@ def junglescout_spider(db_base):
...
@@ -368,7 +368,7 @@ def junglescout_spider(db_base):
response
=
json
.
loads
(
response
.
text
)
response
=
json
.
loads
(
response
.
text
)
break
break
except
:
except
:
time
.
sleep
(
random
.
uniform
(
15
,
3
5
.75
))
time
.
sleep
(
random
.
uniform
(
15
,
3
0
.75
))
response_data
=
response
[
'data'
]
response_data
=
response
[
'data'
]
print
(
'code::'
,
response
[
'code'
])
print
(
'code::'
,
response
[
'code'
])
...
@@ -387,7 +387,7 @@ def junglescout_spider(db_base):
...
@@ -387,7 +387,7 @@ def junglescout_spider(db_base):
print
(
'获取数据:'
,
category_name
[
'name'
],
i
,
est
,
year_month
)
print
(
'获取数据:'
,
category_name
[
'name'
],
i
,
est
,
year_month
)
sales
=
int
(
est
)
sales
=
int
(
est
)
name_rnak_list
.
append
((
category_name
[
'name'
],
i
,
sales
,
year_month
))
name_rnak_list
.
append
((
category_name
[
'name'
],
i
,
sales
,
year_month
))
time
.
sleep
(
random
.
uniform
(
20
,
7
5.75
))
time
.
sleep
(
random
.
uniform
(
20
,
6
5.75
))
# break
# break
for
i
in
range
(
4
):
for
i
in
range
(
4
):
try
:
try
:
...
@@ -408,7 +408,7 @@ def junglescout_spider(db_base):
...
@@ -408,7 +408,7 @@ def junglescout_spider(db_base):
cursor_us_mysql_db
,
db_us
=
db_class_us
.
us_mysql_db
()
# us 站点 mysql
cursor_us_mysql_db
,
db_us
=
db_class_us
.
us_mysql_db
()
# us 站点 mysql
time
.
sleep
(
20
)
time
.
sleep
(
20
)
print
(
'当前完成。获取下一个分类销量'
)
print
(
'当前完成。获取下一个分类销量'
)
time
.
sleep
(
random
.
uniform
(
120
,
24
0.5
))
time
.
sleep
(
random
.
uniform
(
90
,
20
0.5
))
def
save_site_category
(
site_bsr_dict
=
None
):
def
save_site_category
(
site_bsr_dict
=
None
):
...
...
py_spider/amazon_every_day_spider/junglescout_spider.py
View file @
000d315d
...
@@ -85,12 +85,12 @@ def junglescout_spider(db_base):
...
@@ -85,12 +85,12 @@ def junglescout_spider(db_base):
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Cache-Control"
:
"no-cache"
,
"Cache-Control"
:
"no-cache"
,
'Cookie'
:
'_ga=GA1.1.
19240078.1751854600; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751854601; HMACCOUNT=28ABEEABEFA97E4A; _gcl_au=1.1.536675967.1751854601; MEIQIA_TRACK_ID=2zWlEnsYAqnZRdhJqJ5txX7tpXm; MEIQIA_VISIT_ID=2zWlEmUkBQV745rliAtXEdAk0CJ; ecookie=ZyZ05gxOxlDTPkM1_CN; 8f00639f9c446a2d0213=54fb71d3f2c9e8acb7878e0f73abbf33; _fp=65dbbe41a37f8f9fbe702eba96328267; _gaf_fp=e03eac62da4f8988dc796341e1bd822c; current_guest=jsxcNvsgBJO1_250707-100340; rank-login-user=502219157192wVgAJpdturGN5Im+nPDQqTtoVYwVNo1oWP9MD0mtMHFwS3LrhtAUhuCnvMHsCl; rank-login-user-info="eyJuaWNrbmFtZSI6IuWViuWTiOWTiOWTiCIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTUzKioqKjEyNzAiLCJ0b2tlbiI6IjUwMjIxOTE1NzE5MndWZ0FKcGR0dXJHTjVJbStuUERRcVR0b1ZZd1ZObzFvV1A5TUQwbXRNSEZ3UzNMcmh0QVVodUNudk1Ic0NsIn0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiIwZ01FdlJuNWJ1dlZhVW5IZ1lKSDFRIiwiaWF0IjoxNzUxODU0NjA1LCJleHAiOjE3NTE5NDEwMDUsIm5iZiI6MTc1MTg1NDU0NSwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIwMSwicGkiOm51bGwsIm5uIjoi5ZWK5ZOI5ZOI5ZOIIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxNTM2ODA1MTI3MCIsImVtIjoibWVpeW91bGFAbWVpeW91bGEuY29tIiwibWwiOiJHIn0.Ujr6_K3vHIQRw3x52QAQdTftMy6GbZ_TunmFMgW76onCy3EkBzx7uxEv-42zRRXgKLMUfJz2t0ierqXV6Evh9i-o5F0ZUBREzm48LHpGSw6Iupjx4Udc3VQwVqgiUOmYBvnTAQqmaj6iA5l06zAZcVNHQASZ5xe5QFUCllIOL0m8tf3Xad6T8u5oLHRHTTuyy5nDAqLu6ZxVOqUYYXsIzq9H2qAsPhqIgRy_5Av1zyoAcQErddadCe25H_ILmKO0Az9ANIFg4o1r_is_VFVZpGvbz8nCN0JLuY3uajAjf2JXoEzhHT9YbMP0o2TrZDRPdORV3HVK1N5uvghRaRyJvw; ao_lo_to_n="502219157192wVgAJpdturGN5Im+nPDfbd9htCMUGF/tdMS8/gmBNzv9/utYT5ucwmHHPC71S6i4RnT3fLUZW/nDI61eZx1uqLqr+hBy0X/aeJ6c/sSSc="; rank-guest-user=502219157192wVgAJpdturGN5Im+nPDYsyQgRxjbXtKYdDjju8ax0OkcsNUNGWP3xY6uiwKVVO; JSESSIONID=96FF611DCBDF20B9C6C921EAD2A55205; _ga_38NCVF2XST=GS2.1.s1751854600$o1$g1$t1751854612$j48$l0$h1855838417; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1751854612; _ga_CN0F80S6GL=GS2.1.s1751854600$o1$g1$t1751854613$j47
$l0$h0'
,
'Cookie'
:
'_ga=GA1.1.
522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN; x-hng=lang=zh-CN&domain=www.sellersprite.com; a40ac813159995d028ba=3d9b7c15f5787387e62acd734f598f23; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1751973053,1752031904,1752460043,1752653436; HMACCOUNT=800EBCCFB4C6BBFB; rank-guest-user=8301172571YFpPM/DhYDVQzRAgRu7tcQTFTi48nSnOk/TNMkep2gdtR77QXyNfDPmFlYbdSsdL; rank-login-user=8301172571YFpPM/DhYDVQzRAgRu7tcWqD2KCbe1WiKcOarfxTCdls3AJ9lNFy+VA8a+RTm195; rank-login-user-info=eyJuaWNrbmFtZSI6Iuilv+mXqOWQuembqiIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTMzKioqKjU0MDciLCJ0b2tlbiI6IjgzMDExNzI1NzFZRnBQTS9EaFlEVlF6UkFnUnU3dGNXcUQyS0NiZTFXaUtjT2FyZnhUQ2RsczNBSjlsTkZ5K1ZBOGErUlRtMTk1In0=; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJLcVRRV2RPbVNNcjlKTU1qYTdXRjFRIiwiaWF0IjoxNzUyNjUzNDM4LCJleHAiOjE3NTI3Mzk4MzgsIm5iZiI6MTc1MjY1MzM3OCwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTMzNDkzLCJwaSI6bnVsbCwibm4iOiLopb_pl6jlkLnpm6oiLCJzeXMiOiJTU19DTiIsImVkIjoiTiIsInBobiI6IjEzMzkyNDE1NDA3IiwiZW0iOiJxcTE2NTMxMjE4NjUzQDE2My5jb20iLCJtbCI6IkcifQ.caY2QxTbtUVg7CQXvNJcmVo1YU0TGy3AD01dIddF76PHjYbbFh5a8zZAdAXnAKM1wNcs39d1MM8Wa-uoXHiitqDlCZsWyF9aXzco9L4wn-yU4xlMYsf7VoquZI6bxaMT2TNeX6vgQBod-NeXHYFpZQWdrH5sfZHQypkpRINb_o1QwaWvZrjufj1UwYdiypryBxTDyCuLfD4djU0PLMRXvifY6Ef86VNjAlsY8gFqDdHiVLixR2GWGdKRtoG74Ak5DX2eMDT6ak-OMrWYOaikthxIXiqdADTq2tvUCmjO4pE0oYnWhSEx9-UABo7jxJ0v_Af8B6AVu7ccC0NUUvWBMA; ao_lo_to_n=8301172571YFpPM/DhYDVQzRAgRu7tca/7vKUOAtDW4w4LhsAzrvlsqk8xCK+opMY27DGtrDKlwUwhqg///+C6QOw12iRKNIq9mCOV5+ORmOA+PwqisF4=; _gaf_fp=0f3f9e0c791b5513d38aa715d0624aab; _gcl_au=1.1.420472597.1749119222.448034571.1752653439.1752653439; JSESSIONID=0F617D64E2FD6DD92F3BB10935E3C846; _ga_38NCVF2XST=GS2.1.s1752653436$o51$g1$t1752653450$j46$l0$h366949276; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1752653451; _ga_CN0F80S6GL=GS2.1.s1752653437$o50$g1$t1752653451$j46
$l0$h0'
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
}
}
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
data
=
{
data
=
{
"station"
:
"U
S
"
,
"station"
:
"U
K
"
,
"cid"
:
category_name
[
'c_id'
],
# 分类id
"cid"
:
category_name
[
'c_id'
],
# 分类id
"bsr"
:
f
"{i}"
# 排名
"bsr"
:
f
"{i}"
# 排名
}
}
...
@@ -167,7 +167,7 @@ def save_site_category(site_bsr_dict=None):
...
@@ -167,7 +167,7 @@ def save_site_category(site_bsr_dict=None):
def
run
():
def
run
():
# get_cid()
# get_cid()
junglescout_spider
(
'u
s
'
)
junglescout_spider
(
'u
k
'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
py_spider/amazon_save_db/save_all_syn_st_minid_maxid.py
View file @
000d315d
...
@@ -158,7 +158,7 @@ if __name__ == '__main__':
...
@@ -158,7 +158,7 @@ if __name__ == '__main__':
month
=
7
month
=
7
engine_db_num
=
14
engine_db_num
=
14
# for site in ['de','uk']:
# for site in ['de','uk']:
for
site
in
[
'u
k
'
]:
for
site
in
[
'u
s
'
]:
time
.
sleep
(
0
)
time
.
sleep
(
0
)
count_all_syn_st_id
(
site_name
=
site
,
month
=
month
)
.
get_minid_maxid
()
count_all_syn_st_id
(
site_name
=
site
,
month
=
month
)
.
get_minid_maxid
()
# count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()
# count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()
...
...
py_spider/amazon_spider/asin_detail_pg.py
View file @
000d315d
...
@@ -214,7 +214,8 @@ class async_asin_pg():
...
@@ -214,7 +214,8 @@ class async_asin_pg():
'created_time'
:
new_date
,
'current_asin'
:
items
[
'current_asin'
],
'created_time'
:
new_date
,
'current_asin'
:
items
[
'current_asin'
],
'parent_asin'
:
items
[
"parentAsin"
],
'div_id_list'
:
items
[
'div_id_list'
],
'parent_asin'
:
items
[
"parentAsin"
],
'div_id_list'
:
items
[
'div_id_list'
],
'bundles_this_asins_json'
:
items
[
'bundles_this_asins_data_json'
],
'bundles_this_asins_json'
:
items
[
'bundles_this_asins_data_json'
],
'video_m3u8_url'
:
items
[
"video_m3u8"
],
'result_list_json'
:
items
[
'result_list_json'
]
'video_m3u8_url'
:
items
[
"video_m3u8"
],
'result_list_json'
:
items
[
'result_list_json'
],
'bundle_asin_component_json'
:
items
[
'bundle_asin_component_json'
]
}
}
if
self
.
site_name
in
[
'uk'
,
'de'
,
'fr'
,
'es'
,
'it'
]:
if
self
.
site_name
in
[
'uk'
,
'de'
,
'fr'
,
'es'
,
'it'
]:
item
[
'five_six_val'
]
=
items
[
'five_six_val'
]
item
[
'five_six_val'
]
=
items
[
'five_six_val'
]
...
@@ -222,8 +223,6 @@ class async_asin_pg():
...
@@ -222,8 +223,6 @@ class async_asin_pg():
item
[
'five_six_val'
]
=
None
item
[
'five_six_val'
]
=
None
# 第二次请求
# 第二次请求
_response_text
=
None
_response_text
=
None
# if (item['weight'] is None and item['volume'] is None and item['rank'] is None and item[
# 'launch_time'] is None) or (item['variat_num'] > 0 and is_variat == '0'):
if
item
[
'variat_num'
]
>
0
and
is_variat
==
'0'
:
if
item
[
'variat_num'
]
>
0
and
is_variat
==
'0'
:
self
.
request_total_count_list
.
append
(
4
)
self
.
request_total_count_list
.
append
(
4
)
if
item
[
'variat_num'
]
>
0
:
if
item
[
'variat_num'
]
>
0
:
...
@@ -478,7 +477,7 @@ class async_asin_pg():
...
@@ -478,7 +477,7 @@ class async_asin_pg():
def
run
(
self
):
def
run
(
self
):
asin_list
=
self
.
save_asin_detail
.
read_db_data
()
asin_list
=
self
.
save_asin_detail
.
read_db_data
()
# asin_list = ['B0
7BXM8RZ3|2025-01|1|1|null|null','B07FM8P1Z1|2025-01|1|1|null|null','B07TWHCK69
|2025-01|1|1|null|null']
# asin_list = ['B0
BPKK2BMN
|2025-01|1|1|null|null']
if
asin_list
:
if
asin_list
:
for
asin
in
asin_list
:
for
asin
in
asin_list
:
self
.
queries_asin_queue
.
put
(
asin
)
self
.
queries_asin_queue
.
put
(
asin
)
...
...
py_spider/amazon_spider/recall_cases_spider.py
View file @
000d315d
This diff is collapsed.
Click to expand it.
py_spider/amzon_parse_db_html/pares_html.py
View file @
000d315d
...
@@ -35,7 +35,7 @@ class Parse_asin_html():
...
@@ -35,7 +35,7 @@ class Parse_asin_html():
print
(
'没有该 asin html'
)
print
(
'没有该 asin html'
)
def
search_term_html
(
self
,
site_name
=
'us'
,
month
=
'04'
):
def
search_term_html
(
self
,
site_name
=
'us'
,
month
=
'04'
):
sql
=
f
"SELECT search_term,page,html FROM search_term_html_2025_{month} WHERE search_term='
lace white tops for women
' and site_name = '{site_name}'"
sql
=
f
"SELECT search_term,page,html FROM search_term_html_2025_{month} WHERE search_term='
resin kit
' and site_name = '{site_name}'"
print
(
sql
)
print
(
sql
)
df
=
pd
.
read_sql
(
sql
,
con
=
engine_strrocks
)
df
=
pd
.
read_sql
(
sql
,
con
=
engine_strrocks
)
print
(
df
.
values
)
print
(
df
.
values
)
...
@@ -52,8 +52,8 @@ class Parse_asin_html():
...
@@ -52,8 +52,8 @@ class Parse_asin_html():
print
(
'没有该 搜索词 html'
)
print
(
'没有该 搜索词 html'
)
def
run
(
self
):
def
run
(
self
):
self
.
asin_html
()
#
self.asin_html()
# self.search_term_html(site_name='us',month='04
')
self
.
search_term_html
(
site_name
=
'uk'
,
month
=
'07
'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
py_spider/utils/asin_parse.py
View file @
000d315d
...
@@ -402,7 +402,7 @@ class ParseAsinUs(object):
...
@@ -402,7 +402,7 @@ class ParseAsinUs(object):
break
break
if
min_match_asin_data_list
:
if
min_match_asin_data_list
:
min_match_asin_json
=
json
.
dumps
(
min_match_asin_data_list
,
ensure_ascii
=
False
)
min_match_asin_json
=
json
.
dumps
(
min_match_asin_data_list
,
ensure_ascii
=
False
)
# bundles_this_asins ,Bundles with this item B0BPV8R4K8
# bundles_this_asins ,Bundles with this item B0BPV8R4K8
变体下方位置。和五点描述挨着
bundles_this_asins_data_list
=
[]
bundles_this_asins_data_list
=
[]
bundles_this_asins_data_json
=
None
bundles_this_asins_data_json
=
None
for
i
in
ASIN_XPATH
[
'bundles_this_asins'
]:
for
i
in
ASIN_XPATH
[
'bundles_this_asins'
]:
...
@@ -436,7 +436,48 @@ class ParseAsinUs(object):
...
@@ -436,7 +436,48 @@ class ParseAsinUs(object):
break
break
if
bundles_this_asins_data_list
:
if
bundles_this_asins_data_list
:
bundles_this_asins_data_json
=
json
.
dumps
(
bundles_this_asins_data_list
,
ensure_ascii
=
False
)
bundles_this_asins_data_json
=
json
.
dumps
(
bundles_this_asins_data_list
,
ensure_ascii
=
False
)
# 捆绑销售 B0DD8W2DZD This bundle contains 2 items
href_asin_list
=
self
.
response_s
.
xpath
(
"//div[@class='bundle-title']/following-sibling::div//div[@class='bundle-components']//div[contains(@id,'bundle-component-details-component-title')]/a/@href"
)
bundle_asin_component_list
=
[]
if
href_asin_list
:
bundle_component_asin_list
=
[]
for
href_asin
in
href_asin_list
:
i_asin_list
=
re
.
findall
(
r'(?:[A-Z0-9]{10}|[0-9]{10})'
,
href_asin
)
bundle_component_asin_list
.
append
(
i_asin_list
[
0
])
if
bundle_component_asin_list
:
bundle_component_asin_list
=
list
(
set
(
bundle_component_asin_list
))
for
bundle_component_asin
in
bundle_component_asin_list
:
print
(
'bundle_component_asin:'
,
bundle_component_asin
)
bundle_title_list
=
self
.
response_s
.
xpath
(
f
"//a[contains(@href,'{bundle_component_asin}')]/parent::div[contains(@id,'component-details-component-title')]/a/text()"
)
bundle_asin_title
=
bundle_title_list
[
0
]
if
bundle_title_list
else
None
bundle_img_list
=
self
.
response_s
.
xpath
(
f
"//a[contains(@href,'{bundle_component_asin}')]/img/@src"
)
bundle_asin_img
=
bundle_img_list
[
0
]
if
bundle_img_list
else
None
bundle_review_list
=
self
.
response_s
.
xpath
(
rf
"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review')]//following-sibling::span/text()"
)
bundle_asin_review
=
bundle_review_list
[
0
]
if
bundle_review_list
else
None
bundle_starslist
=
self
.
response_s
.
xpath
(
rf
"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review-stars')]/@class"
)
bundle_stars
=
bundle_starslist
[
0
]
if
bundle_starslist
else
None
bundle_stars_list
=
re
.
findall
(
r'a-star-(.*?) '
,
bundle_stars
)
bundle_asin_star
=
bundle_stars_list
[
0
]
.
replace
(
'-'
,
'.'
)
if
bundle_stars_list
else
None
bundle_asin_price_list
=
self
.
response_s
.
xpath
(
f
"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::div[contains(@class,'component-details-component-prices')]/span/text()"
)
bundle_asin_price
=
bundle_asin_price_list
[
0
]
if
bundle_asin_price_list
else
None
bundle_asin_point_list
=
self
.
response_s
.
xpath
(
f
"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::ul/li[contains(@id,'component-details-component-bullet-point')]/span/text()"
)
bundle_asin_point
=
'|-|'
.
join
(
bundle_asin_point_list
)
if
bundle_asin_point_list
else
None
bundle_component_asin_item
=
{
"bundle_component_asin"
:
bundle_component_asin
,
"bundle_asin_title"
:
bundle_asin_title
,
'bundle_asin_img'
:
bundle_asin_img
,
"bundle_asin_review"
:
bundle_asin_review
,
"bundle_asin_star"
:
bundle_asin_star
,
"bundle_asin_price"
:
bundle_asin_price
,
"bundle_asin_point"
:
bundle_asin_point
}
bundle_asin_component_list
.
append
(
bundle_component_asin_item
)
if
bundle_asin_component_list
:
bundle_asin_component_json
=
json
.
dumps
(
bundle_asin_component_list
)
else
:
bundle_asin_component_json
=
None
# 五点描述
# 五点描述
for
i
in
ASIN_XPATH
[
'five_data'
]:
for
i
in
ASIN_XPATH
[
'five_data'
]:
five_text_list
=
self
.
response_s
.
xpath
(
i
)
five_text_list
=
self
.
response_s
.
xpath
(
i
)
...
@@ -2815,7 +2856,7 @@ class ParseAsinUs(object):
...
@@ -2815,7 +2856,7 @@ class ParseAsinUs(object):
'customer_reviews_json'
:
customer_reviews_json
,
'together_asin_json'
:
together_asin_json
,
'customer_reviews_json'
:
customer_reviews_json
,
'together_asin_json'
:
together_asin_json
,
'min_match_asin_json'
:
min_match_asin_json
,
'seller_json'
:
seller_json
,
'current_asin'
:
current_asin
,
'min_match_asin_json'
:
min_match_asin_json
,
'seller_json'
:
seller_json
,
'current_asin'
:
current_asin
,
'div_id_list'
:
div_id_list
,
'bundles_this_asins_data_json'
:
bundles_this_asins_data_json
,
'div_id_list'
:
div_id_list
,
'bundles_this_asins_data_json'
:
bundles_this_asins_data_json
,
'video_m3u8'
:
video_m3u8
,
'result_list_json'
:
result_list_json
}
'video_m3u8'
:
video_m3u8
,
'result_list_json'
:
result_list_json
,
'bundle_asin_component_json'
:
bundle_asin_component_json
}
if
self
.
site_name
==
'us'
:
if
self
.
site_name
==
'us'
:
item
[
'three_four_val'
]
=
Join_Prime_int
item
[
'three_four_val'
]
=
Join_Prime_int
elif
self
.
site_name
in
[
'uk'
,
'fr'
,
'it'
,
'es'
]:
elif
self
.
site_name
in
[
'uk'
,
'fr'
,
'it'
,
'es'
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment