Commit afffe7d3 by fangxingjun

no message

parent cdd9fab4
......@@ -67,7 +67,7 @@ class ImgHdfsIndex(Templates):
def save_data(self):
with self.engine_doris.begin() as conn:
sql_truncate = f"truncate {self.db_save}"
sql_truncate = f"truncate TABLE {self.db_save};"
print(f"sql_truncate: {sql_truncate}")
conn.execute(sql_truncate)
self.df_save.to_sql(self.db_save, con=self.engine_doris, if_exists="append", index=False)
......
......@@ -38,6 +38,7 @@ class ImportStToPg14(object):
"uk": 3,
"de": 5,
}
self.fetch_year_month_by_week() # 如果传的date_type='week', 将date_info转换成月的值
def fetch_year_month_by_week(self):
if self.date_type == 'week':
......@@ -45,6 +46,8 @@ class ImportStToPg14(object):
df = pd.read_sql(sql, con=self.engine_mysql)
self.date_info = list(df.year_month)[0]
def delete_dirty_data(self):
print(f"删除脏数据, 防止失败执行时报错")
from datetime import datetime
today_str = datetime.now().strftime("%Y-%m-%d")
......@@ -58,7 +61,7 @@ class ImportStToPg14(object):
conn.execute(sql_delete)
def read_data(self):
self.fetch_year_month_by_week() # 如果传的date_type='week', 将date_info转换成月的值
self.delete_dirty_data()
# 1. 读取date_20_to_30表获取月份对应的周
sql_get_week = f"select year_week, year, week from selection.date_20_to_30 WHERE `year_month`='{self.date_info}' and week_day=1"
df_week = pd.read_sql(sql_get_week, con=self.engine_mysql)
......@@ -166,8 +169,9 @@ class ImportStToPg14(object):
def update_workflow_manager(self):
with self.engine_mysql.begin() as conn:
priority = self.site_name_pri_dict[self.site_name]
spider_script = f'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name} 2026-04 >/dev/null 2>&1 &";'
update_sql_workflow = f"""
INSERT INTO workflow_manager
INSERT INTO selection.workflow_manager
(
workflow_name,
site_name,
......@@ -175,6 +179,7 @@ class ImportStToPg14(object):
date_info,
priority,
spider_name,
spider_script,
spider_is_ready,
spider_state,
bg_name,
......@@ -187,14 +192,16 @@ class ImportStToPg14(object):
'month',
'{self.date_info}',
{priority},
'us_spider_st',
'{self.site_name}_spider_st',
'{spider_script}',
'yes',
1,
'us_asin_export',
'{self.site_name}_asin_export',
1
)
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
bg_dol_state = VALUES(bg_dol_state),
spider_state = VALUES(spider_state);
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment