Commit 73017639 by fangxingjun

no message

parent f43bbe07
import sys
import os
import time
import traceback
import pandas as pd
......@@ -24,7 +25,6 @@ class ImportStToPg14(object):
self.df_st_month_state = pd.DataFrame()
self.df_save = pd.DataFrame()
# self.fetch_year_month_by_week() # 如果传的date_type='week', 将date_info转换成月的值
self.year, self.month = self.date_info.split("-")[0], int(self.date_info.split("-")[1])
self.site_name_url_dict = {
"us": 'https://www.amazon.com/',
"uk": 'https://www.amazon.co.uk/',
......@@ -39,6 +39,7 @@ class ImportStToPg14(object):
"de": 5,
}
self.fetch_year_month_by_week() # 如果传的date_type='week', 将date_info转换成月的值
self.year, self.month = self.date_info.split("-")[0], int(self.date_info.split("-")[1])
def fetch_year_month_by_week(self):
if self.date_type == 'week':
......@@ -52,11 +53,11 @@ class ImportStToPg14(object):
today_str = datetime.now().strftime("%Y-%m-%d")
with self.engine_pg14.begin() as conn:
sql_delete = f"delete from {self.site_name}_search_term_month where (date_info='{self.date_info}' and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
sql_delete = f"delete from {self.site_name}_search_term_month where (date_info='{self.date_info}' and created_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month:", sql_delete)
conn.execute(sql_delete)
sql_delete = f"delete from {self.site_name}_search_term_month_syn where (date_info='{self.date_info}' and state=1 and updated_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
sql_delete = f"delete from {self.site_name}_search_term_month_syn where (date_info='{self.date_info}' and state=1 and created_time>='{today_str} 00:00:00') or (date_info<'{self.date_info}');"
print(f"sql_delete--{self.site_name}_search_term_month_syn:", sql_delete)
conn.execute(sql_delete)
......@@ -106,7 +107,7 @@ class ImportStToPg14(object):
before_count = len(df_search_term)
df_search_term = df_search_term.loc[
df_search_term["search_term"].fillna("").astype(str).str.len() <= 450
df_search_term["url"].fillna("").astype(str).str.len() <= 450
].copy()
after_count = len(df_search_term)
......@@ -216,7 +217,7 @@ class ImportStToPg14(object):
self.save_data()
break
except Exception as e:
print(f"搜索词导入到pg14失败:{self.site_name}-{self.date_type}--{self.date_info}")
print(f"搜索词导入到pg14失败:{self.site_name}-{self.date_type}--{self.date_info}, 报错信息: {traceback.format_exc()}")
self.engine_mysql = DBUtil.get_db_engine(db_type=DbTypes.mysql.name, site_name=self.site_name)
self.engine_pg14 = DBUtil.get_db_engine(db_type=DbTypes.postgresql_14.name, site_name=self.site_name)
time.sleep(60)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment