no message

parent be525766
...@@ -28,14 +28,24 @@ from apscheduler.triggers.cron import CronTrigger ...@@ -28,14 +28,24 @@ from apscheduler.triggers.cron import CronTrigger
from config import ( from config import (
WEEKLY_DAY_OF_WEEK, WEEKLY_HOUR, WEEKLY_MINUTE, WEEKLY_DAY_OF_WEEK, WEEKLY_HOUR, WEEKLY_MINUTE,
MONTHLY_DAY, MONTHLY_HOUR, MONTHLY_MINUTE, MONTHLY_DAY, MONTHLY_HOUR, MONTHLY_MINUTE,
MAX_RETRY_DAYS, RETRY_INTERVAL_MINUTES, DATA_URL, SITES, SITE_PICKER_MAP, SITE_COUNTRY_NAME, SITE_DM_COUNTRY MAX_RETRY_DAYS, RETRY_INTERVAL_MIN_MINUTES, RETRY_INTERVAL_MAX_MINUTES,
DATA_URL, SITES, SITE_PICKER_MAP, SITE_COUNTRY_NAME, SITE_DM_COUNTRY
) )
from secure_db_client import get_remote_engine from secure_db_client import get_remote_engine
import re import re
import os import os
import random
from urllib.parse import urlparse, parse_qs, unquote from urllib.parse import urlparse, parse_qs, unquote
class DataNotReadyError(Exception):
"""
目标周期的报表在 Amazon 端还没生成(下拉框中找不到对应的周/月/年选项)。
与"下载环节失败"区分:这种情况当天重试无意义,应立即终止,交给任务计划次日触发。
"""
pass
# ======================== Chrome 进程管理 ======================== # ======================== Chrome 进程管理 ========================
def is_chrome_running(): def is_chrome_running():
...@@ -421,11 +431,11 @@ def select_filters_and_query(page, week_start, week_end): ...@@ -421,11 +431,11 @@ def select_filters_and_query(page, week_start, week_end):
# 通过 shadow_root 查找目标周选项 # 通过 shadow_root 查找目标周选项
target_option = week_dropdown.shadow_root.ele(f'css:kat-option[value="{target_value}"]', timeout=5) target_option = week_dropdown.shadow_root.ele(f'css:kat-option[value="{target_value}"]', timeout=5)
if not target_option: if not target_option:
logger.error(f"未找到目标周选项: {target_value},下拉框中可能没有该周数据") logger.error(f"未找到目标周选项: {target_value},下拉框中可能没有该周数据(报表尚未生成)")
# 点击空白处收起下拉框,避免影响后续操作 # 点击空白处收起下拉框,避免影响后续操作
page.ele('tag:body').click() page.ele('tag:body').click()
time.sleep(1) time.sleep(1)
return False raise DataNotReadyError(f"周报 {target_value} 尚未生成")
target_option.click() target_option.click()
time.sleep(1) time.sleep(1)
logger.info(f"Select week 已切换为 {target_value}(原值: {current_value})") logger.info(f"Select week 已切换为 {target_value}(原值: {current_value})")
...@@ -488,10 +498,10 @@ def select_filters_and_query_monthly(page, month_end): ...@@ -488,10 +498,10 @@ def select_filters_and_query_monthly(page, month_end):
time.sleep(1) time.sleep(1)
year_option = year_dropdown.shadow_root.ele(f'css:kat-option[value="{target_year}"]', timeout=5) year_option = year_dropdown.shadow_root.ele(f'css:kat-option[value="{target_year}"]', timeout=5)
if not year_option: if not year_option:
logger.error(f"未找到年份选项: {target_year}") logger.error(f"未找到年份选项: {target_year}(报表尚未生成)")
page.ele('tag:body').click() page.ele('tag:body').click()
time.sleep(1) time.sleep(1)
return False raise DataNotReadyError(f"月报年份 {target_year} 尚未生成")
year_option.click() year_option.click()
time.sleep(2) # 等待 Select month 下拉框加载 time.sleep(2) # 等待 Select month 下拉框加载
logger.info(f"Select year 已切换为 {target_year}") logger.info(f"Select year 已切换为 {target_year}")
...@@ -509,10 +519,10 @@ def select_filters_and_query_monthly(page, month_end): ...@@ -509,10 +519,10 @@ def select_filters_and_query_monthly(page, month_end):
time.sleep(1) time.sleep(1)
month_option = month_dropdown.shadow_root.ele(f'css:kat-option[value="{target_value}"]', timeout=5) month_option = month_dropdown.shadow_root.ele(f'css:kat-option[value="{target_value}"]', timeout=5)
if not month_option: if not month_option:
logger.error(f"未找到月份选项: {target_value}") logger.error(f"未找到月份选项: {target_value}(报表尚未生成)")
page.ele('tag:body').click() page.ele('tag:body').click()
time.sleep(1) time.sleep(1)
return False raise DataNotReadyError(f"月报月份 {target_value} 尚未生成")
month_option.click() month_option.click()
time.sleep(1) time.sleep(1)
logger.info(f"Select month 已切换为 {target_value}") logger.info(f"Select month 已切换为 {target_value}")
...@@ -885,8 +895,14 @@ def send_notification(event, **kwargs): ...@@ -885,8 +895,14 @@ def send_notification(event, **kwargs):
elif event == 'task_summary': elif event == 'task_summary':
success_sites = kwargs.get('success_sites', []) success_sites = kwargs.get('success_sites', [])
failed_sites = kwargs.get('failed_sites', []) failed_sites = kwargs.get('failed_sites', [])
not_ready_sites = kwargs.get('not_ready_sites', [])
attempt = kwargs.get('attempt', 0) attempt = kwargs.get('attempt', 0)
status = "全部成功" if not failed_sites else "部分失败" if not failed_sites and not not_ready_sites:
status = "全部成功"
elif not_ready_sites and not failed_sites:
status = "报表未生成(待次日)"
else:
status = "部分失败"
retry_info = f"(第 {attempt + 1} 次执行)" if attempt > 0 else "" retry_info = f"(第 {attempt + 1} 次执行)" if attempt > 0 else ""
msg = ( msg = (
f"【ABA {period_type} - 任务汇总】{retry_info}\n" f"【ABA {period_type} - 任务汇总】{retry_info}\n"
...@@ -894,6 +910,7 @@ def send_notification(event, **kwargs): ...@@ -894,6 +910,7 @@ def send_notification(event, **kwargs):
f"周期: {date_info} ({week_start} ~ {week_end})\n" f"周期: {date_info} ({week_start} ~ {week_end})\n"
f"成功: {', '.join(success_sites) if success_sites else '无'}\n" f"成功: {', '.join(success_sites) if success_sites else '无'}\n"
f"失败: {', '.join(failed_sites) if failed_sites else '无'}\n" f"失败: {', '.join(failed_sites) if failed_sites else '无'}\n"
f"报表未生成: {', '.join(not_ready_sites) if not_ready_sites else '无'}\n"
f"时间: {now}" f"时间: {now}"
) )
else: else:
...@@ -1077,7 +1094,9 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info): ...@@ -1077,7 +1094,9 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
:param week_start: 目标周开始日期 :param week_start: 目标周开始日期
:param week_end: 目标周结束日期 :param week_end: 目标周结束日期
:param date_info: 周期标识,周报如 "2026-11",月报如 "2026-02" :param date_info: 周期标识,周报如 "2026-11",月报如 "2026-02"
:return: (success_sites, failed_sites, success_download_infos) 或 False(浏览器/登录失败) :return: (success_sites, failed_sites, success_download_infos, not_ready_sites) 或 False(浏览器/登录失败)
- failed_sites: 下载环节失败的站点(报表已生成,当天可重试)
- not_ready_sites: 报表尚未生成的站点(当天重试无意义,交给任务计划次日触发)
""" """
# 打开数据页,失败则关闭浏览器重试,最多 3 次 # 打开数据页,失败则关闭浏览器重试,最多 3 次
page = None page = None
...@@ -1123,6 +1142,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info): ...@@ -1123,6 +1142,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
submitted_sites = [] submitted_sites = []
failed_submit_sites = [] failed_submit_sites = []
not_ready_sites = [] # 报表尚未生成的站点(当天不重试,交给任务计划次日)
for site in sites: for site in sites:
try: try:
if submit_download_request(page, site, date_type, week_start, week_end): if submit_download_request(page, site, date_type, week_start, week_end):
...@@ -1131,13 +1151,16 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info): ...@@ -1131,13 +1151,16 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
else: else:
failed_submit_sites.append(site) failed_submit_sites.append(site)
logger.error(f"[{site}] 提交失败") logger.error(f"[{site}] 提交失败")
except DataNotReadyError as e:
logger.warning(f"[{site}] 报表尚未生成,当天不重试: {e}")
not_ready_sites.append(site)
except Exception as e: except Exception as e:
logger.error(f"[{site}] 提交异常: {e}") logger.error(f"[{site}] 提交异常: {e}")
failed_submit_sites.append(site) failed_submit_sites.append(site)
if not submitted_sites: if not submitted_sites:
logger.error("所有站点提交均失败,流程终止") logger.error("所有站点提交均失败,流程终止")
return [], list(sites), [] return [], list(failed_submit_sites), [], list(not_ready_sites)
logger.info(f"提交成功 {len(submitted_sites)} 个: {submitted_sites}") logger.info(f"提交成功 {len(submitted_sites)} 个: {submitted_sites}")
if failed_submit_sites: if failed_submit_sites:
...@@ -1151,7 +1174,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info): ...@@ -1151,7 +1174,7 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
manager_tab = open_download_manager(page) manager_tab = open_download_manager(page)
if not manager_tab: if not manager_tab:
logger.error("未能打开 Download Manager,所有已提交站点视为失败") logger.error("未能打开 Download Manager,所有已提交站点视为失败")
return [], list(sites), [] return [], list(failed_submit_sites) + list(submitted_sites), [], list(not_ready_sites)
pending_tasks = [ pending_tasks = [
{ {
...@@ -1196,10 +1219,12 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info): ...@@ -1196,10 +1219,12 @@ def run_weekly_download(sites, date_type, week_start, week_end, date_info):
if failed_sites: if failed_sites:
logger.warning(f"以下站点下载失败: {', '.join(failed_sites)}") logger.warning(f"以下站点下载失败: {', '.join(failed_sites)}")
else: if not_ready_sites:
logger.warning(f"以下站点报表尚未生成: {', '.join(not_ready_sites)}")
if not failed_sites and not not_ready_sites:
logger.success(f"所有站点下载完成: {', '.join(sites)}") logger.success(f"所有站点下载完成: {', '.join(sites)}")
return success_sites, failed_sites, success_download_infos return success_sites, failed_sites, success_download_infos, list(not_ready_sites)
finally: finally:
try: try:
...@@ -1260,27 +1285,41 @@ def weekly_task_with_retry(sites, date_type='week'): ...@@ -1260,27 +1285,41 @@ def weekly_task_with_retry(sites, date_type='week'):
result = run_weekly_download(remaining_sites, date_type, week_start, week_end, date_info) result = run_weekly_download(remaining_sites, date_type, week_start, week_end, date_info)
# run_weekly_download 返回 (success_sites, failed_sites, success_download_infos) 或 False(浏览器/登录失败) # run_weekly_download 返回 (success_sites, failed_sites, success_download_infos, not_ready_sites) 或 False(浏览器/登录失败)
not_ready_sites = []
if result is False: if result is False:
# 登录/浏览器失败,login_failed 通知已在内部发送,这里不重复发汇总 # 登录/浏览器失败,login_failed 通知已在内部发送,这里不重复发汇总
failed_sites = list(remaining_sites) failed_sites = list(remaining_sites)
else: else:
success_sites, failed_sites, success_download_infos = result success_sites, failed_sites, success_download_infos, not_ready_sites = result
all_success_sites.extend(success_sites) all_success_sites.extend(success_sites)
all_download_infos.extend(success_download_infos) all_download_infos.extend(success_download_infos)
remaining_sites = failed_sites # 下次只重试失败的站点
# 发送任务汇总通知(每次执行都发,不管成功失败) # 发送任务汇总通知(每次执行都发,不管成功失败)
send_notification('task_summary', date_type=date_type, date_info=date_info, send_notification('task_summary', date_type=date_type, date_info=date_info,
week_start=week_start, week_end=week_end, week_start=week_start, week_end=week_end,
success_sites=all_success_sites, failed_sites=failed_sites, attempt=attempt) success_sites=all_success_sites, failed_sites=failed_sites,
not_ready_sites=not_ready_sites, attempt=attempt)
# 全部成功 → 推送接口 → 结束 # 全部成功 → 推送接口 → 结束
if not failed_sites: if not failed_sites and not not_ready_sites:
logger.success("所有站点数据下载成功") logger.success("所有站点数据下载成功")
push_to_api(all_download_infos, date_type, date_info, week_start) push_to_api(all_download_infos, date_type, date_info, week_start)
return return
if not_ready_sites:
logger.warning(f"以下站点报表尚未生成,稍后一并重试: {', '.join(not_ready_sites)}")
# 下载失败 + 报表未生成的站点都纳入下一轮重试
# (报表未生成也再等等看,稍后可能就出来了;影响不大)
remaining_sites = list(failed_sites) + list(not_ready_sites)
if not remaining_sites:
# 没有需要重试的站点(全部成功)→ 结束
logger.success("没有需要重试的站点,结束本次执行")
if all_download_infos:
push_to_api(all_download_infos, date_type, date_info, week_start)
return
# 最后一次机会也失败了 → 推送已成功的部分 → 放弃 # 最后一次机会也失败了 → 推送已成功的部分 → 放弃
if attempt >= MAX_RETRY_DAYS: if attempt >= MAX_RETRY_DAYS:
logger.error(f"已重试 {MAX_RETRY_DAYS} 次仍失败,放弃本周期") logger.error(f"已重试 {MAX_RETRY_DAYS} 次仍失败,放弃本周期")
...@@ -1288,13 +1327,14 @@ def weekly_task_with_retry(sites, date_type='week'): ...@@ -1288,13 +1327,14 @@ def weekly_task_with_retry(sites, date_type='week'):
push_to_api(all_download_infos, date_type, date_info, week_start) push_to_api(all_download_infos, date_type, date_info, week_start)
return return
# 还有机会,等待指定间隔后重试 # 还有机会,等待一个随机间隔后重试(在 [MIN, MAX] 分钟之间随机取,避免固定节奏)
sleep_seconds = RETRY_INTERVAL_MINUTES * 60 sleep_minutes = random.randint(RETRY_INTERVAL_MIN_MINUTES, RETRY_INTERVAL_MAX_MINUTES)
next_retry_time = datetime.now() + timedelta(minutes=RETRY_INTERVAL_MINUTES) sleep_seconds = sleep_minutes * 60
next_retry_time = datetime.now() + timedelta(minutes=sleep_minutes)
logger.warning( logger.warning(
f"本次失败站点: {', '.join(remaining_sites)}," f"本次待重试站点: {', '.join(remaining_sites)},"
f"将在 {next_retry_time.strftime('%Y-%m-%d %H:%M')} 重试" f"将在 {next_retry_time.strftime('%Y-%m-%d %H:%M')} 重试"
f"(已尝试 {attempt + 1}/{MAX_RETRY_DAYS + 1} 次)" f"(随机间隔 {sleep_minutes} 分钟,已尝试 {attempt + 1}/{MAX_RETRY_DAYS + 1} 次)"
) )
time.sleep(sleep_seconds) time.sleep(sleep_seconds)
...@@ -1381,18 +1421,20 @@ def manual_download(sites, date_type, start_date_str, end_date_str, date_info): ...@@ -1381,18 +1421,20 @@ def manual_download(sites, date_type, start_date_str, end_date_str, date_info):
all_download_infos = [] # 累计所有成功站点的下载信息,用于推送接口 all_download_infos = [] # 累计所有成功站点的下载信息,用于推送接口
result = run_weekly_download(remaining_sites, date_type, week_start, week_end, date_info) result = run_weekly_download(remaining_sites, date_type, week_start, week_end, date_info)
not_ready_sites = []
if result is False: if result is False:
# 登录/浏览器失败,login_failed 通知已在内部发送 # 登录/浏览器失败,login_failed 通知已在内部发送
failed_sites = list(sites) failed_sites = list(sites)
else: else:
success_sites, failed_sites, success_download_infos = result success_sites, failed_sites, success_download_infos, not_ready_sites = result
all_success_sites.extend(success_sites) all_success_sites.extend(success_sites)
all_download_infos.extend(success_download_infos) all_download_infos.extend(success_download_infos)
# 发送任务汇总通知 # 发送任务汇总通知
send_notification('task_summary', date_type=date_type, date_info=date_info, send_notification('task_summary', date_type=date_type, date_info=date_info,
week_start=week_start, week_end=week_end, week_start=week_start, week_end=week_end,
success_sites=all_success_sites, failed_sites=failed_sites, attempt=0) success_sites=all_success_sites, failed_sites=failed_sites,
not_ready_sites=not_ready_sites, attempt=0)
# 推送已成功的部分到接口 # 推送已成功的部分到接口
if all_download_infos: if all_download_infos:
......
...@@ -16,9 +16,10 @@ MONTHLY_HOUR = 22 ...@@ -16,9 +16,10 @@ MONTHLY_HOUR = 22
MONTHLY_MINUTE = 0 MONTHLY_MINUTE = 0
# 失败后最多重试次数(周和月通用) # 失败后最多重试次数(周和月通用)
MAX_RETRY_DAYS = 5 MAX_RETRY_DAYS = 2
# 每次重试间隔(分钟) # 每次重试间隔(分钟)——在 [MIN, MAX] 之间随机取一个值,避免固定间隔
RETRY_INTERVAL_MINUTES = 30 RETRY_INTERVAL_MIN_MINUTES = 30
RETRY_INTERVAL_MAX_MINUTES = 60
# ======================== 部署配置======================== # ======================== 部署配置========================
# 电脑A: # 电脑A:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment