Commit fec40875 by fangxingjun

no message

parent c023bddc
...@@ -104,6 +104,21 @@ class DwdNsrBsrKeepaAsin(Templates): ...@@ -104,6 +104,21 @@ class DwdNsrBsrKeepaAsin(Templates):
df_result.show(10, truncate=False) df_result.show(10, truncate=False)
df_result.groupBy("asin_cate_flag").count().orderBy("count", ascending=False).show(truncate=False) df_result.groupBy("asin_cate_flag").count().orderBy("count", ascending=False).show(truncate=False)
self.df_save_asin_cate = df_result self.df_save_asin_cate = df_result
df_bsr_agg = self.df_asin_bsr.groupBy(['asin']).agg(
F.max("date_info").alias("bsr_latest_date"),
# F.count("date_info").alias("bsr_30day_count"),
F.countDistinct("date_info").alias("bsr_30day_count"),
)
df_nsr_agg = self.df_asin_nsr.groupBy(['asin']).agg(
F.max("date_info").alias("nsr_latest_date"),
# F.count("date_info").alias("nsr_30day_count"),
F.countDistinct("date_info").alias("nsr_30day_count"),
)
self.df_save_asin_cate = self.df_save_asin_cate.join(
df_bsr_agg, on=['asin'], how='left'
).join(
df_nsr_agg, on=['asin'], how='left'
)
self.df_save_asin_cate = self.df_save_asin_cate.withColumn("site_name", F.lit(self.site_name)) self.df_save_asin_cate = self.df_save_asin_cate.withColumn("site_name", F.lit(self.site_name))
self.df_save_asin_cate.show(10, truncate=False) self.df_save_asin_cate.show(10, truncate=False)
self.save_data_common( self.save_data_common(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment