Commit 41c62ac6 by fangxingjun

no message

parent 4c3ab8b0
...@@ -132,7 +132,9 @@ class DwtAsinSync(Templates): ...@@ -132,7 +132,9 @@ class DwtAsinSync(Templates):
sql_asin_stable = f"""select asin, asin_volume as volume, asin_weight_str as weight_str from dim_asin_stable_info where site_name="{self.site_name}";""" sql_asin_stable = f"""select asin, asin_volume as volume, asin_weight_str as weight_str from dim_asin_stable_info where site_name="{self.site_name}";"""
self.df_asin_stable = self.read_data_common(sql=sql_asin_stable, content="2.2 读取dim_asin_variation_info表的asin重量体积属性") self.df_asin_stable = self.read_data_common(sql=sql_asin_stable, content="2.2 读取dim_asin_variation_info表的asin重量体积属性")
# 读取syn爬虫表 # 读取syn爬虫表
table_syn = f"us_all_syn_st_day_{self.date_info.replace('-', '_')}" if self.date_type == 'day' else f"us_all_syn_st_month_{self.date_info.replace('-', '_')}" while True:
try:
table_syn = f"{self.site_name}_all_syn_st_day_{self.date_info.replace('-', '_')}" if self.date_type == 'day' else f"{self.site_name}_all_syn_st_month_{self.date_info.replace('-', '_')}"
sql_asin_syn = f"select asin from {table_syn};" sql_asin_syn = f"select asin from {table_syn};"
pdf_asin = self.engine_pg14.read_sql(sql_asin_syn) pdf_asin = self.engine_pg14.read_sql(sql_asin_syn)
print((f"pdf_asin: {pdf_asin.shape}, sql_asin_syn: {sql_asin_syn}")) print((f"pdf_asin: {pdf_asin.shape}, sql_asin_syn: {sql_asin_syn}"))
...@@ -144,6 +146,18 @@ class DwtAsinSync(Templates): ...@@ -144,6 +146,18 @@ class DwtAsinSync(Templates):
self.df_asin_syn = self.df_asin_syn.drop_duplicates(["asin"]).cache() self.df_asin_syn = self.df_asin_syn.drop_duplicates(["asin"]).cache()
print(f"self.df_asin_syn: {self.df_asin_syn.count()}") print(f"self.df_asin_syn: {self.df_asin_syn.count()}")
self.df_asin_syn.show(10, truncate=False) self.df_asin_syn.show(10, truncate=False)
break
except Exception as e:
time.sleep(100)
self.engine_pg14 = get_remote_engine(
site_name=self.site_name,
db_type='postgresql_14'
)
self.engine_mysql = get_remote_engine(
site_name=self.site_name,
db_type='mysql'
)
continue
def handle_data(self): def handle_data(self):
if self.date_type in ['month']: if self.date_type in ['month']:
...@@ -182,7 +196,7 @@ class DwtAsinSync(Templates): ...@@ -182,7 +196,7 @@ class DwtAsinSync(Templates):
# 处理同步逻辑 # 处理同步逻辑
print("==="*20) print("==="*20)
print(f"{type(self.df_asin_syn)}: {self.df_asin_syn.count()}") print(f"{type(self.df_asin_syn)}: {self.df_asin_syn.count()}")
if self.date_type != 'day' and self.df_asin_syn.count() > 0: if self.date_type != 'day' and self.df_asin_syn.count() > 1:
self.df_save = self.df_save.join(self.df_asin_syn, on=['asin'], how="left_anti") self.df_save = self.df_save.join(self.df_asin_syn, on=['asin'], how="left_anti")
self.df_save = self.df_save.withColumn('site_name', F.lit(self.site_name)) self.df_save = self.df_save.withColumn('site_name', F.lit(self.site_name))
self.df_save = self.df_save.withColumn('date_type', F.lit(self.date_type)) self.df_save = self.df_save.withColumn('date_type', F.lit(self.date_type))
......
...@@ -6,7 +6,7 @@ from utils.secure_db_client import get_remote_engine ...@@ -6,7 +6,7 @@ from utils.secure_db_client import get_remote_engine
def export_data(site_name, date_type, date_info): def export_data(site_name, date_type, date_info):
engine = get_remote_engine( engine = get_remote_engine(
site_name="us", # -> database "selection" site_name=site_name, # -> database "selection"
db_type="postgresql_14", # -> 服务端 alias "mysql" db_type="postgresql_14", # -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql" # user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认 # user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment