Commit 6e15b3dc by chenyuanjie

ABA搜索词-新增字段DD50、DD100、DD200

parent 62e30b59
...@@ -175,7 +175,7 @@ class DwdStMeasure(Templates): ...@@ -175,7 +175,7 @@ class DwdStMeasure(Templates):
self.df_asin_bs = self.spark.sql(sql).cache() self.df_asin_bs = self.spark.sql(sql).cache()
self.df_asin_bs.show(10) self.df_asin_bs.show(10)
sql = f"select asin, asin_title, asin_price, parent_asin " \ sql = f"select asin, asin_title, asin_price, parent_asin, asin_bought_month " \
f"from dim_asin_detail where site_name='{self.site_name}' and date_type='{self.date_type.replace('_old', '')}' and date_info='{self.date_info}';" f"from dim_asin_detail where site_name='{self.site_name}' and date_type='{self.date_type.replace('_old', '')}' and date_info='{self.date_info}';"
print("sql:", sql) print("sql:", sql)
self.df_asin_detail = self.spark.sql(sql).cache() self.df_asin_detail = self.spark.sql(sql).cache()
...@@ -413,17 +413,34 @@ class DwdStMeasure(Templates): ...@@ -413,17 +413,34 @@ class DwdStMeasure(Templates):
df_st_asin_agg, on=['asin'], how='left' df_st_asin_agg, on=['asin'], how='left'
) )
elif cal_type == "st": elif cal_type == "st":
# 计算搜索词DD占比
df_asin_bought_month = self.df_asin_detail.select('asin', 'asin_bought_month').join(
self.df_asin_amazon_orders, on=['asin'], how='left'
).withColumn(
"asin_bought_month", F.coalesce(F.col("asin_bought_month"), F.col("asin_amazon_orders"))
).drop("asin_amazon_orders")
df_st_asin_agg = self.df_st_asin_duplicated.select("search_term", "asin").join( df_st_asin_agg = self.df_st_asin_duplicated.select("search_term", "asin").join(
self.df_asin_self, on='asin', how='left' self.df_asin_self, on='asin', how='left'
).join(
df_asin_bought_month, on='asin', how='left'
).withColumn( ).withColumn(
"is_self_asin", "is_self_asin",
F.when(F.col("is_self_asin").isNotNull(), F.col("is_self_asin")).otherwise(F.lit(0)) F.when(F.col("is_self_asin").isNotNull(), F.col("is_self_asin")).otherwise(F.lit(0))
).groupby(['search_term']).agg( ).groupby(['search_term']).agg(
F.sum('is_self_asin').alias("st_self_asin_counts"), F.sum('is_self_asin').alias("st_self_asin_counts"),
F.count('asin').alias("st_total_asin_counts") F.count('asin').alias("st_total_asin_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 50, 1).otherwise(0)).alias("st_dd50_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 100, 1).otherwise(0)).alias("st_dd100_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 200, 1).otherwise(0)).alias("st_dd200_counts")
).withColumn(
'st_self_asin_proportion', F.round(F.col('st_self_asin_counts') / F.col('st_total_asin_counts'), 4)
).withColumn(
'st_dd50_proportion', F.round(F.col('st_dd50_counts') / F.col('st_total_asin_counts'), 4)
).withColumn(
'st_dd100_proportion', F.round(F.col('st_dd100_counts') / F.col('st_total_asin_counts'), 4)
).withColumn( ).withColumn(
'st_self_asin_proportion', 'st_dd200_proportion', F.round(F.col('st_dd200_counts') / F.col('st_total_asin_counts'), 4)
F.round(F.col('st_self_asin_counts') / F.col('st_total_asin_counts'), 4)
) )
df = df.join( df = df.join(
df_st_asin_agg, on=['search_term'], how='left' df_st_asin_agg, on=['search_term'], how='left'
...@@ -725,4 +742,4 @@ if __name__ == '__main__': ...@@ -725,4 +742,4 @@ if __name__ == '__main__':
date_type = sys.argv[2] # 参数2:类型:day/week/4_week/month/quarter date_type = sys.argv[2] # 参数2:类型:day/week/4_week/month/quarter
date_info = sys.argv[3] # 参数3:年-月-日/年-周/年-月/年-季, 比如: 2022-1 date_info = sys.argv[3] # 参数3:年-月-日/年-周/年-月/年-季, 比如: 2022-1
handle_obj = DwdStMeasure(site_name=site_name, date_type=date_type, date_info=date_info) handle_obj = DwdStMeasure(site_name=site_name, date_type=date_type, date_info=date_info)
handle_obj.run() handle_obj.run()
\ No newline at end of file
...@@ -181,7 +181,10 @@ class DwtAbaStAnalytics(Templates): ...@@ -181,7 +181,10 @@ class DwtAbaStAnalytics(Templates):
st_zr_counts, st_zr_counts,
st_sp_counts, st_sp_counts,
st_self_asin_counts, st_self_asin_counts,
st_self_asin_proportion st_self_asin_proportion,
st_dd50_proportion,
st_dd100_proportion,
st_dd200_proportion
from dwd_st_measure from dwd_st_measure
where site_name = '{self.site_name}' where site_name = '{self.site_name}'
and date_type = '{self.date_type}' and date_type = '{self.date_type}'
...@@ -903,7 +906,10 @@ class DwtAbaStAnalytics(Templates): ...@@ -903,7 +906,10 @@ class DwtAbaStAnalytics(Templates):
"st_self_asin_proportion", "st_self_asin_proportion",
"lang", "lang",
"asin_movie_type_count", "asin_movie_type_count",
"is_hidden_cate" "is_hidden_cate",
"st_dd50_proportion",
"st_dd100_proportion",
"st_dd200_proportion"
) )
# 空值处理 # 空值处理
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment