Commit b1660e04 by fangxingjun

no message

parent e7745fcc
...@@ -173,7 +173,7 @@ class ImportStToPg14(object): ...@@ -173,7 +173,7 @@ class ImportStToPg14(object):
def update_workflow_manager(self): def update_workflow_manager(self):
with self.engine_mysql.begin() as conn: with self.engine_mysql.begin() as conn:
priority = self.site_name_pri_dict[self.site_name] priority = self.site_name_pri_dict[self.site_name]
spider_script = f'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name} 2026-04 >/dev/null 2>&1 &";' spider_script = f'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name} {self.date_info} >/dev/null 2>&1 &";'
update_sql_workflow = f""" update_sql_workflow = f"""
INSERT INTO selection.workflow_manager INSERT INTO selection.workflow_manager
( (
......
...@@ -8,6 +8,7 @@ from utils.secure_db_client import get_remote_engine ...@@ -8,6 +8,7 @@ from utils.secure_db_client import get_remote_engine
def update_workflow_manager(site_name, date_type, date_info): def update_workflow_manager(site_name, date_type, date_info):
print(f"当前执行的参数: {site_name, date_type, date_info}")
if date_type == "month": if date_type == "month":
while True: while True:
try: try:
...@@ -21,38 +22,46 @@ def update_workflow_manager(site_name, date_type, date_info): ...@@ -21,38 +22,46 @@ def update_workflow_manager(site_name, date_type, date_info):
db_type='mysql' db_type='mysql'
) )
with engine_mysql.begin() as conn: with engine_mysql.begin() as conn:
spider_script = f'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_spider_month.py {site_name} {date_type} {date_info} >/dev/null 2>&1 &";'
priority = site_name_pri_dict[site_name] priority = site_name_pri_dict[site_name]
update_sql_workflow = f""" update_sql_workflow = f"""
INSERT INTO workflow_manager INSERT INTO selection.workflow_manager
( (
workflow_name, workflow_name,
site_name, site_name,
date_type, date_type,
date_info, date_info,
priority, priority,
spider_name, spider_name,
spider_is_ready, spider_script,
spider_state, spider_is_ready,
bg_name, spider_state,
bg_dol_state bg_name,
) bg_dol_state,
VALUES finished_count
( )
'月全流程', VALUES
'{site_name}', (
'month', '月全流程',
'{date_info}', '{site_name}',
{priority}, 'month',
'{site_name}_spider_asin', '{date_info}',
'yes', {priority},
1, '{site_name}_spider_asin',
'{site_name}_all_cal', '{spider_script}',
1 'yes',
) 1,
ON DUPLICATE KEY UPDATE '{site_name}_all_cal',
spider_is_ready = VALUES(spider_is_ready), 3,
spider_state = VALUES(spider_state); 0
""" )
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
spider_state = VALUES(spider_state),
bg_dol_state = VALUES(bg_dol_state),
finished_count = COALESCE(finished_count, 0) + 1;
"""
print(f"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}") print(f"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}")
conn.execute(update_sql_workflow) conn.execute(update_sql_workflow)
break break
...@@ -99,5 +108,5 @@ if __name__ == '__main__': ...@@ -99,5 +108,5 @@ if __name__ == '__main__':
date_type = sys.argv[2] # 参数2:类型:week/4_week/month/quarter/day date_type = sys.argv[2] # 参数2:类型:week/4_week/month/quarter/day
date_info = sys.argv[3] # 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1 date_info = sys.argv[3] # 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data(site_name, date_type, date_info) export_data(site_name, date_type, date_info)
# if site_name in ["us", "uk", "de"] and date_type == "month": if site_name in ["us", "uk", "de"] and date_type == "month":
# update_workflow_manager(site_name, date_type, date_info) update_workflow_manager(site_name, date_type, date_info)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment