Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
spider
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
selection-new
spider
Commits
1f19360c
Commit
1f19360c
authored
Jun 15, 2026
by
PC-20230618BYKI\Administrator
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
be525766
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
71 additions
and
28 deletions
+71
-28
aba_report.py
hjm_spider_code/aba_report-download/aba_report.py
+67
-25
config.py
hjm_spider_code/aba_report-download/config.py
+4
-3
No files found.
hjm_spider_code/aba_report-download/aba_report.py
View file @
1f19360c
...
...
@@ -28,14 +28,24 @@ from apscheduler.triggers.cron import CronTrigger
from
config
import
(
WEEKLY_DAY_OF_WEEK
,
WEEKLY_HOUR
,
WEEKLY_MINUTE
,
MONTHLY_DAY
,
MONTHLY_HOUR
,
MONTHLY_MINUTE
,
MAX_RETRY_DAYS
,
RETRY_INTERVAL_MINUTES
,
DATA_URL
,
SITES
,
SITE_PICKER_MAP
,
SITE_COUNTRY_NAME
,
SITE_DM_COUNTRY
MAX_RETRY_DAYS
,
RETRY_INTERVAL_MIN_MINUTES
,
RETRY_INTERVAL_MAX_MINUTES
,
DATA_URL
,
SITES
,
SITE_PICKER_MAP
,
SITE_COUNTRY_NAME
,
SITE_DM_COUNTRY
)
from
secure_db_client
import
get_remote_engine
import
re
import
os
import
random
from
urllib.parse
import
urlparse
,
parse_qs
,
unquote
class
DataNotReadyError
(
Exception
):
"""
目标周期的报表在 Amazon 端还没生成(下拉框中找不到对应的周/月/年选项)。
与"下载环节失败"区分:这种情况当天重试无意义,应立即终止,交给任务计划次日触发。
"""
pass
# ======================== Chrome 进程管理 ========================
def
is_chrome_running
():
...
...
@@ -421,11 +431,11 @@ def select_filters_and_query(page, week_start, week_end):
# 通过 shadow_root 查找目标周选项
target_option
=
week_dropdown
.
shadow_root
.
ele
(
f
'css:kat-option[value="{target_value}"]'
,
timeout
=
5
)
if
not
target_option
:
logger
.
error
(
f
"未找到目标周选项: {target_value},下拉框中可能没有该周数据"
)
logger
.
error
(
f
"未找到目标周选项: {target_value},下拉框中可能没有该周数据
(报表尚未生成)
"
)
# 点击空白处收起下拉框,避免影响后续操作
page
.
ele
(
'tag:body'
)
.
click
()
time
.
sleep
(
1
)
r
eturn
False
r
aise
DataNotReadyError
(
f
"周报 {target_value} 尚未生成"
)
target_option
.
click
()
time
.
sleep
(
1
)
logger
.
info
(
f
"Select week 已切换为 {target_value}(原值: {current_value})"
)
...
...
@@ -488,10 +498,10 @@ def select_filters_and_query_monthly(page, month_end):
time
.
sleep
(
1
)
year_option
=
year_dropdown
.
shadow_root
.
ele
(
f
'css:kat-option[value="{target_year}"]'
,
timeout
=
5
)
if
not
year_option
:
logger
.
error
(
f
"未找到年份选项: {target_year}"
)
logger
.
error
(
f
"未找到年份选项: {target_year}
(报表尚未生成)
"
)
page
.
ele
(
'tag:body'
)
.
click
()
time
.
sleep
(
1
)
r
eturn
False
r
aise
DataNotReadyError
(
f
"月报年份 {target_year} 尚未生成"
)
year_option
.
click
()
time
.
sleep
(
2
)
# 等待 Select month 下拉框加载
logger
.
info
(
f
"Select year 已切换为 {target_year}"
)
...
...
@@ -509,10 +519,10 @@ def select_filters_and_query_monthly(page, month_end):
time
.
sleep
(
1
)
month_option
=
month_dropdown
.
shadow_root
.
ele
(
f
'css:kat-option[value="{target_value}"]'
,
timeout
=
5
)
if
not
month_option
:
logger
.
error
(
f
"未找到月份选项: {target_value}"
)
logger
.
error
(
f
"未找到月份选项: {target_value}
(报表尚未生成)
"
)
page
.
ele
(
'tag:body'
)
.
click
()
time
.
sleep
(
1
)
r
eturn
False
r
aise
DataNotReadyError
(
f
"月报月份 {target_value} 尚未生成"
)
month_option
.
click
()
time
.
sleep
(
1
)
logger
.
info
(
f
"Select month 已切换为 {target_value}"
)
...
...
@@ -885,8 +895,14 @@ def send_notification(event, **kwargs):
elif
event
==
'task_summary'
:
success_sites
=
kwargs
.
get
(
'success_sites'
,
[])
failed_sites
=
kwargs
.
get
(
'failed_sites'
,
[])
not_ready_sites
=
kwargs
.
get
(
'not_ready_sites'
,
[])
attempt
=
kwargs
.
get
(
'attempt'
,
0
)
status
=
"全部成功"
if
not
failed_sites
else
"部分失败"
if
not
failed_sites
and
not
not_ready_sites
:
status
=
"全部成功"
elif
not_ready_sites
and
not
failed_sites
:
status
=
"报表未生成(待次日)"
else
:
status
=
"部分失败"
retry_info
=
f
"(第 {attempt + 1} 次执行)"
if
attempt
>
0
else
""
msg
=
(
f
"【ABA {period_type} - 任务汇总】{retry_info}
\n
"
...
...
@@ -894,6 +910,7 @@ def send_notification(event, **kwargs):
f
"周期: {date_info} ({week_start} ~ {week_end})
\n
"
f
"成功: {', '.join(success_sites) if success_sites else '无'}
\n
"
f
"失败: {', '.join(failed_sites) if failed_sites else '无'}
\n
"
f
"报表未生成: {', '.join(not_ready_sites) if not_ready_sites else '无'}
\n
"
f
"时间: {now}"
)
else
:
...
...
@@ -1077,7 +1094,9 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
:param week_start: 目标周开始日期
:param week_end: 目标周结束日期
:param date_info: 周期标识,周报如 "2026-11",月报如 "2026-02"
:return: (success_sites, failed_sites, success_download_infos) 或 False(浏览器/登录失败)
:return: (success_sites, failed_sites, success_download_infos, not_ready_sites) 或 False(浏览器/登录失败)
- failed_sites: 下载环节失败的站点(报表已生成,当天可重试)
- not_ready_sites: 报表尚未生成的站点(当天重试无意义,交给任务计划次日触发)
"""
# 打开数据页,失败则关闭浏览器重试,最多 3 次
page
=
None
...
...
@@ -1123,6 +1142,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
submitted_sites
=
[]
failed_submit_sites
=
[]
not_ready_sites
=
[]
# 报表尚未生成的站点(当天不重试,交给任务计划次日)
for
site
in
sites
:
try
:
if
submit_download_request
(
page
,
site
,
date_type
,
week_start
,
week_end
):
...
...
@@ -1131,13 +1151,16 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
else
:
failed_submit_sites
.
append
(
site
)
logger
.
error
(
f
"[{site}] 提交失败"
)
except
DataNotReadyError
as
e
:
logger
.
warning
(
f
"[{site}] 报表尚未生成,当天不重试: {e}"
)
not_ready_sites
.
append
(
site
)
except
Exception
as
e
:
logger
.
error
(
f
"[{site}] 提交异常: {e}"
)
failed_submit_sites
.
append
(
site
)
if
not
submitted_sites
:
logger
.
error
(
"所有站点提交均失败,流程终止"
)
return
[],
list
(
sites
),
[]
return
[],
list
(
failed_submit_sites
),
[],
list
(
not_ready_sites
)
logger
.
info
(
f
"提交成功 {len(submitted_sites)} 个: {submitted_sites}"
)
if
failed_submit_sites
:
...
...
@@ -1151,7 +1174,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
manager_tab
=
open_download_manager
(
page
)
if
not
manager_tab
:
logger
.
error
(
"未能打开 Download Manager,所有已提交站点视为失败"
)
return
[],
list
(
sites
),
[]
return
[],
list
(
failed_submit_sites
)
+
list
(
submitted_sites
),
[],
list
(
not_ready_sites
)
pending_tasks
=
[
{
...
...
@@ -1196,10 +1219,12 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
if
failed_sites
:
logger
.
warning
(
f
"以下站点下载失败: {', '.join(failed_sites)}"
)
else
:
if
not_ready_sites
:
logger
.
warning
(
f
"以下站点报表尚未生成: {', '.join(not_ready_sites)}"
)
if
not
failed_sites
and
not
not_ready_sites
:
logger
.
success
(
f
"所有站点下载完成: {', '.join(sites)}"
)
return
success_sites
,
failed_sites
,
success_download_infos
return
success_sites
,
failed_sites
,
success_download_infos
,
list
(
not_ready_sites
)
finally
:
try
:
...
...
@@ -1260,27 +1285,41 @@ def weekly_task_with_retry(sites, date_type='week'):
result
=
run_weekly_download
(
remaining_sites
,
date_type
,
week_start
,
week_end
,
date_info
)
# run_weekly_download 返回 (success_sites, failed_sites, success_download_infos) 或 False(浏览器/登录失败)
# run_weekly_download 返回 (success_sites, failed_sites, success_download_infos, not_ready_sites) 或 False(浏览器/登录失败)
not_ready_sites
=
[]
if
result
is
False
:
# 登录/浏览器失败,login_failed 通知已在内部发送,这里不重复发汇总
failed_sites
=
list
(
remaining_sites
)
else
:
success_sites
,
failed_sites
,
success_download_infos
=
result
success_sites
,
failed_sites
,
success_download_infos
,
not_ready_sites
=
result
all_success_sites
.
extend
(
success_sites
)
all_download_infos
.
extend
(
success_download_infos
)
remaining_sites
=
failed_sites
# 下次只重试失败的站点
# 发送任务汇总通知(每次执行都发,不管成功失败)
send_notification
(
'task_summary'
,
date_type
=
date_type
,
date_info
=
date_info
,
week_start
=
week_start
,
week_end
=
week_end
,
success_sites
=
all_success_sites
,
failed_sites
=
failed_sites
,
attempt
=
attempt
)
success_sites
=
all_success_sites
,
failed_sites
=
failed_sites
,
not_ready_sites
=
not_ready_sites
,
attempt
=
attempt
)
# 全部成功 → 推送接口 → 结束
if
not
failed_sites
:
if
not
failed_sites
and
not
not_ready_sites
:
logger
.
success
(
"所有站点数据下载成功"
)
push_to_api
(
all_download_infos
,
date_type
,
date_info
,
week_start
)
return
if
not_ready_sites
:
logger
.
warning
(
f
"以下站点报表尚未生成,稍后一并重试: {', '.join(not_ready_sites)}"
)
# 下载失败 + 报表未生成的站点都纳入下一轮重试
# (报表未生成也再等等看,稍后可能就出来了;影响不大)
remaining_sites
=
list
(
failed_sites
)
+
list
(
not_ready_sites
)
if
not
remaining_sites
:
# 没有需要重试的站点(全部成功)→ 结束
logger
.
success
(
"没有需要重试的站点,结束本次执行"
)
if
all_download_infos
:
push_to_api
(
all_download_infos
,
date_type
,
date_info
,
week_start
)
return
# 最后一次机会也失败了 → 推送已成功的部分 → 放弃
if
attempt
>=
MAX_RETRY_DAYS
:
logger
.
error
(
f
"已重试 {MAX_RETRY_DAYS} 次仍失败,放弃本周期"
)
...
...
@@ -1288,13 +1327,14 @@ def weekly_task_with_retry(sites, date_type='week'):
push_to_api
(
all_download_infos
,
date_type
,
date_info
,
week_start
)
return
# 还有机会,等待指定间隔后重试
sleep_seconds
=
RETRY_INTERVAL_MINUTES
*
60
next_retry_time
=
datetime
.
now
()
+
timedelta
(
minutes
=
RETRY_INTERVAL_MINUTES
)
# 还有机会,等待一个随机间隔后重试(在 [MIN, MAX] 分钟之间随机取,避免固定节奏)
sleep_minutes
=
random
.
randint
(
RETRY_INTERVAL_MIN_MINUTES
,
RETRY_INTERVAL_MAX_MINUTES
)
sleep_seconds
=
sleep_minutes
*
60
next_retry_time
=
datetime
.
now
()
+
timedelta
(
minutes
=
sleep_minutes
)
logger
.
warning
(
f
"本次
失败
站点: {', '.join(remaining_sites)},"
f
"本次
待重试
站点: {', '.join(remaining_sites)},"
f
"将在 {next_retry_time.strftime('
%
Y-
%
m-
%
d
%
H:
%
M')} 重试"
f
"(已尝试 {attempt + 1}/{MAX_RETRY_DAYS + 1} 次)"
f
"(
随机间隔 {sleep_minutes} 分钟,
已尝试 {attempt + 1}/{MAX_RETRY_DAYS + 1} 次)"
)
time
.
sleep
(
sleep_seconds
)
...
...
@@ -1381,18 +1421,20 @@ def manual_download(sites, date_type, start_date_str, end_date_str, date_info):
all_download_infos
=
[]
# 累计所有成功站点的下载信息,用于推送接口
result
=
run_weekly_download
(
remaining_sites
,
date_type
,
week_start
,
week_end
,
date_info
)
not_ready_sites
=
[]
if
result
is
False
:
# 登录/浏览器失败,login_failed 通知已在内部发送
failed_sites
=
list
(
sites
)
else
:
success_sites
,
failed_sites
,
success_download_infos
=
result
success_sites
,
failed_sites
,
success_download_infos
,
not_ready_sites
=
result
all_success_sites
.
extend
(
success_sites
)
all_download_infos
.
extend
(
success_download_infos
)
# 发送任务汇总通知
send_notification
(
'task_summary'
,
date_type
=
date_type
,
date_info
=
date_info
,
week_start
=
week_start
,
week_end
=
week_end
,
success_sites
=
all_success_sites
,
failed_sites
=
failed_sites
,
attempt
=
0
)
success_sites
=
all_success_sites
,
failed_sites
=
failed_sites
,
not_ready_sites
=
not_ready_sites
,
attempt
=
0
)
# 推送已成功的部分到接口
if
all_download_infos
:
...
...
hjm_spider_code/aba_report-download/config.py
View file @
1f19360c
...
...
@@ -16,9 +16,10 @@ MONTHLY_HOUR = 22
MONTHLY_MINUTE
=
0
# 失败后最多重试次数(周和月通用)
MAX_RETRY_DAYS
=
5
# 每次重试间隔(分钟)
RETRY_INTERVAL_MINUTES
=
30
MAX_RETRY_DAYS
=
2
# 每次重试间隔(分钟)——在 [MIN, MAX] 之间随机取一个值,避免固定间隔
RETRY_INTERVAL_MIN_MINUTES
=
30
RETRY_INTERVAL_MAX_MINUTES
=
60
# ======================== 部署配置========================
# 电脑A:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment