Commit 27e0f356 by fangxingjun

no message

parent 9044a5f3
...@@ -66,6 +66,10 @@ class ImgHdfsIndex(Templates): ...@@ -66,6 +66,10 @@ class ImgHdfsIndex(Templates):
pass pass
def save_data(self): def save_data(self):
with self.engine_doris.begin() as conn:
sql_truncate = f"truncate {self.db_save}"
print(f"sql_truncate: {sql_truncate}")
conn.execut(sql_truncate)
self.df_save.to_sql(self.db_save, con=self.engine_doris, if_exists="append", index=False) self.df_save.to_sql(self.db_save, con=self.engine_doris, if_exists="append", index=False)
def run(self): def run(self):
......
import os
import sys
import time
import traceback
sys.path.append(os.path.dirname(sys.path[0]))
from utils.common_util import CommonUtil
from utils.secure_db_client import get_remote_engine
if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None)
date_type = CommonUtil.get_sys_arg(2, None)
date_info = CommonUtil.get_sys_arg(3, None)
db_type = 'postgresql_14'
engine = get_remote_engine(
site_name=site_name,
db_type=db_type
)
check_table = f"{site_name}_search_term_{date_type.replace('_week', '')}_syn"
def check_syn():
while True:
try:
engine = get_remote_engine(
site_name=site_name,
db_type=db_type
)
sql_check_syn = f"select * from {check_table} where date_info='{date_info}' and state in (1, 2) limit 100"
df = engine.read_sql(sql_check_syn)
if df.shape[0] > 0:
print(f"搜索词-爬虫还未抓完, 等待5分钟继续")
time.sleep(300)
continue
else:
print("搜索词-爬虫已经全部抓取完成, 可以同步数据")
break
except Exception as e:
print(f"搜索词-检查asin是否全部抓取完成报错, 报错信息: {e}, {traceback.format_exc()}")
time.sleep(300)
check_syn()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment