Commit a3a44cc8 by chenyuanjie

导出pg集群表待调用keepa数据

parent 0a04ad9f
...@@ -200,10 +200,10 @@ class DwtAsinSync(Templates): ...@@ -200,10 +200,10 @@ class DwtAsinSync(Templates):
df_keepa = self.spark.sql(f""" df_keepa = self.spark.sql(f"""
select asin from dim_keepa_asin_info select asin from dim_keepa_asin_info
where site_name = '{self.site_name}' where site_name = '{self.site_name}'
and package_length >= 0 and package_length > 0
and package_width >= 0 and package_width > 0
and package_height >= 0 and package_height > 0
and weight >= 0 and (package_weight > 0 or item_weight > 0)
""").repartition(40, 'asin') """).repartition(40, 'asin')
df = df.join(df_keepa, on='asin', how='left_anti').cache() df = df.join(df_keepa, on='asin', how='left_anti').cache()
print(f"排除keepa后数据量: {df.count()}") print(f"排除keepa后数据量: {df.count()}")
......
...@@ -244,15 +244,15 @@ class ExportAsinWithoutKeepa(object): ...@@ -244,15 +244,15 @@ class ExportAsinWithoutKeepa(object):
print(f"筛选后数据量: {df.count()}") print(f"筛选后数据量: {df.count()}")
# 排除 dim_keepa_asin_info 中已有有效keepa数据的ASIN # 排除 dim_keepa_asin_info 中已有有效keepa数据的ASIN
# 若 package_length/width/height/weight 任意一个 < 0,视为数据异常,不排除(需重新抓取 # 有效定义:长/宽/高 >= 0,且 package_weight 或 item_weight 任意一个 > 0(与 export_need_profit_rate 取数规则一致
print("8. 排除已有keepa数据的ASIN (dim_keepa_asin_info)") print("8. 排除已有keepa数据的ASIN (dim_keepa_asin_info)")
df_keepa = self.spark.sql(f""" df_keepa = self.spark.sql(f"""
select asin from dim_keepa_asin_info select asin from dim_keepa_asin_info
where site_name = '{self.site_name}' where site_name = '{self.site_name}'
and package_length >= 0 and package_length > 0
and package_width >= 0 and package_width > 0
and package_height >= 0 and package_height > 0
and weight >= 0 and (package_weight > 0 or item_weight > 0)
""").repartition(40, 'asin') """).repartition(40, 'asin')
df = df.join(df_keepa, on='asin', how='left_anti').cache() df = df.join(df_keepa, on='asin', how='left_anti').cache()
print(f"排除keepa后数据量: {df.count()}") print(f"排除keepa后数据量: {df.count()}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment