Commit 6e15b3dc by chenyuanjie

ABA搜索词-新增字段DD50、DD100、DD200

parent 62e30b59
......@@ -175,7 +175,7 @@ class DwdStMeasure(Templates):
self.df_asin_bs = self.spark.sql(sql).cache()
self.df_asin_bs.show(10)
sql = f"select asin, asin_title, asin_price, parent_asin " \
sql = f"select asin, asin_title, asin_price, parent_asin, asin_bought_month " \
f"from dim_asin_detail where site_name='{self.site_name}' and date_type='{self.date_type.replace('_old', '')}' and date_info='{self.date_info}';"
print("sql:", sql)
self.df_asin_detail = self.spark.sql(sql).cache()
......@@ -413,17 +413,34 @@ class DwdStMeasure(Templates):
df_st_asin_agg, on=['asin'], how='left'
)
elif cal_type == "st":
# 计算搜索词DD占比
df_asin_bought_month = self.df_asin_detail.select('asin', 'asin_bought_month').join(
self.df_asin_amazon_orders, on=['asin'], how='left'
).withColumn(
"asin_bought_month", F.coalesce(F.col("asin_bought_month"), F.col("asin_amazon_orders"))
).drop("asin_amazon_orders")
df_st_asin_agg = self.df_st_asin_duplicated.select("search_term", "asin").join(
self.df_asin_self, on='asin', how='left'
).join(
df_asin_bought_month, on='asin', how='left'
).withColumn(
"is_self_asin",
F.when(F.col("is_self_asin").isNotNull(), F.col("is_self_asin")).otherwise(F.lit(0))
).groupby(['search_term']).agg(
F.sum('is_self_asin').alias("st_self_asin_counts"),
F.count('asin').alias("st_total_asin_counts")
F.count('asin').alias("st_total_asin_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 50, 1).otherwise(0)).alias("st_dd50_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 100, 1).otherwise(0)).alias("st_dd100_counts"),
F.sum(F.when(F.col("asin_bought_month") >= 200, 1).otherwise(0)).alias("st_dd200_counts")
).withColumn(
'st_self_asin_proportion', F.round(F.col('st_self_asin_counts') / F.col('st_total_asin_counts'), 4)
).withColumn(
'st_dd50_proportion', F.round(F.col('st_dd50_counts') / F.col('st_total_asin_counts'), 4)
).withColumn(
'st_dd100_proportion', F.round(F.col('st_dd100_counts') / F.col('st_total_asin_counts'), 4)
).withColumn(
'st_self_asin_proportion',
F.round(F.col('st_self_asin_counts') / F.col('st_total_asin_counts'), 4)
'st_dd200_proportion', F.round(F.col('st_dd200_counts') / F.col('st_total_asin_counts'), 4)
)
df = df.join(
df_st_asin_agg, on=['search_term'], how='left'
......
......@@ -181,7 +181,10 @@ class DwtAbaStAnalytics(Templates):
st_zr_counts,
st_sp_counts,
st_self_asin_counts,
st_self_asin_proportion
st_self_asin_proportion,
st_dd50_proportion,
st_dd100_proportion,
st_dd200_proportion
from dwd_st_measure
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
......@@ -903,7 +906,10 @@ class DwtAbaStAnalytics(Templates):
"st_self_asin_proportion",
"lang",
"asin_movie_type_count",
"is_hidden_cate"
"is_hidden_cate",
"st_dd50_proportion",
"st_dd100_proportion",
"st_dd200_proportion"
)
# 空值处理
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment