Commit 16c20612 by fangxingjun

no message

parent b5fad217
......@@ -45,6 +45,18 @@ class ImportStToPg14(object):
df = pd.read_sql(sql, con=self.engine_mysql)
self.date_info = list(df.year_month)[0]
from datetime import datetime
today_str = datetime.now().strftime("%Y-%m-%d")
with self.engine_pg14.begin() as conn:
sql_delete = f"delete from {self.site_name}_search_term_month where (date_info='{self.date_info}' and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month:", sql_delete)
conn.execute(sql_delete)
sql_delete = f"delete from {self.site_name}_search_term_month_syn where (date_info='{self.date_info}' and state=1 and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month_syn:", sql_delete)
conn.execute(sql_delete)
def read_data(self):
self.fetch_year_month_by_week() # 如果传的date_type='week', 将date_info转换成月的值
# 1. 读取date_20_to_30表获取月份对应的周
......@@ -102,18 +114,7 @@ class ImportStToPg14(object):
return df_search_term
def save_data(self):
from datetime import datetime
today_str = datetime.now().strftime("%Y-%m-%d")
with self.engine_pg14.begin() as conn:
sql_delete = f"delete from {self.site_name}_search_term_month where (date_info='{self.date_info}' and state=1 and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month:", sql_delete)
conn.execute(sql_delete)
sql_delete = f"delete from {self.site_name}_search_term_month_syn where (date_info='{self.date_info}' and state=1 and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month_syn:", sql_delete)
conn.execute(sql_delete)
print(f"存储{self.site_name}_search_term_month: {self.df_save.shape}")
self.df_save.to_sql(f"{self.site_name}_search_term_month", con=self.engine_pg14, index=False,
......@@ -193,7 +194,8 @@ class ImportStToPg14(object):
1
)
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_is_ready = VALUES(spider_is_ready),
bg_dol_state = VALUES(bg_dol_state),
spider_state = VALUES(spider_state);
"""
print(f"workflow_manager进度表---重置爬虫的搜索词抓取进度: {update_sql_workflow}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment