Commit cc57a012 by fangxingjun

no message

parent 1a47b299
...@@ -113,7 +113,7 @@ class DwdNsrBsrKeepaAsin(Templates): ...@@ -113,7 +113,7 @@ class DwdNsrBsrKeepaAsin(Templates):
sql_keepa = f""" sql_keepa = f"""
select distinct(asin), date_info, 3 as asin_cate_flag, null as category_id from ods_keepa_finder_asin where site_name='{self.site_name}' and date_info between '{thirty_days_ago}' and '{self.date_info}' select distinct(asin), date_info, 3 as asin_cate_flag, null as category_id from ods_keepa_finder_asin where site_name='{self.site_name}' and date_info between '{thirty_days_ago}' and '{self.date_info}'
union all union all
select distinct(asin), date_info, 4 as asin_cate_flag, null as category_id from ods_keepa_finder_task_asin where site_name='{self.site_name}' select distinct(asin), date_info, 4 as asin_cate_flag, null as category_id from ods_keepa_finder_task_asin where site_name='{self.site_name}' and date_info between '{thirty_days_ago}' and '{self.date_info}'
""" """
if self.date_type == 'month': if self.date_type == 'month':
sql_keepa = f""" sql_keepa = f"""
......
import os
import sys
import time
import traceback
sys.path.append(os.path.dirname(sys.path[0]))
from utils.secure_db_client import get_remote_engine
def wf_month_control(site_name, date_type, date_info, spider_name, wf_type="spider"):
print(f"当前执行的参数: {site_name, date_type, date_info, spider_name, wf_type}")
engine_mysql = get_remote_engine(
site_name='us',
db_type='mysql'
)
"""
ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py us 2026-05 >/dev/null 2>&1 &";
ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_seller_account.py us 2026-05 >/dev/null 2>&1 &";
ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_spider_month.py us month 2026-05 >/dev/null 2>&1 &";
"""
priority_dict = {
"us": [11, 12, 13],
"uk": [21, 22, 23],
"de": [31, 32, 33],
}
# 根据spider_name构建spider_script 爬虫启动命令
spider_name_dict = {
f"{site_name}_spider_st": [
f"Poll_site_search_term_month.py {site_name} {date_info} ",
priority_dict[site_name][0],
f"{site_name}_asin_export",
f"ALL站点-asin同步-pg-api",
],
f"{site_name}_spider_fd": [
f"Poll_site_seller_account.py {site_name} {date_info} ",
priority_dict[site_name][1],
f"{site_name}_fd_import",
f"店铺数据导入-月流程",
],
f"{site_name}_spider_asin": [
f"Poll_site_spider_month.py {site_name} {date_type} {date_info} ",
priority_dict[site_name][2],
f"{site_name}_all_cal",
f"ALL-月流程-ABA+反查+流量选品",
],
}
workflow_name = "月全流程"
script_param = spider_name_dict[spider_name][0]
priority = spider_name_dict[spider_name][1]
spider_script = f'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/{script_param} >/dev/null 2>&1 &";'
bg_name = spider_name_dict[spider_name][2]
bg_dol_name = spider_name_dict[spider_name][3]
if wf_type == "spider":
update_sql_workflow = f"""
INSERT INTO selection.workflow_manager
(
workflow_name,
site_name,
date_type,
date_info,
priority,
spider_name,
spider_script,
spider_is_ready,
spider_state,
bg_name,
bg_dol_name,
bg_dol_state
)
VALUES
(
'{workflow_name}',
'{site_name}',
'{date_type}',
'{date_info}',
{priority},
'{spider_name}',
'{spider_script}',
'yes',
1,
'{bg_name}',
'{bg_dol_name}',
1
)
ON DUPLICATE KEY UPDATE
priority = VALUES(priority),
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
bg_dol_state = VALUES(bg_dol_state),
spider_state = VALUES(spider_state);
"""
print(f"workflow_manager进度表---重置进度: {update_sql_workflow}")
engine_mysql.execute(update_sql_workflow)
elif wf_type == "cal":
update_sql_workflow_bg = f"""update selection.workflow_manager
set bg_dol_state=3, finished_count=COALESCE(finished_count, 0) + 1
WHERE workflow_name='月全流程' and site_name='{site_name}' and date_type='{date_type}' and date_info='{date_info}' and priority={priority}"""
print(f"workflow_manager进度表---更新asin导出进度和完成次数: {update_sql_workflow_bg}")
engine_mysql.execute(update_sql_workflow_bg)
pass
if __name__ == '__main__':
# 测试st写入记录
# for site_name in ['us', 'uk', 'de']:
# wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_asin', wf_type="spider")
site_name = 'us'
# 同步st搜索词
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_st', wf_type="spider")
# 抓完搜索词+同步asin -- st抓取完计算数量+1 + 重置asin抓取
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_asin', wf_type="spider")
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_st', wf_type="cal")
# 同步fd -- 重置asin抓取+st抓取完数量+1
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_fd', wf_type="spider")
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_fd', wf_type="cal")
# 抓完asin+计算全流程 -- 更改asin计算全流程数量+1
wf_month_control(site_name=site_name, date_type='month', date_info='2026-06', spider_name=f'{site_name}_spider_asin', wf_type="cal")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment