Commit bb777f59 by chenyuanjie

流量选品月流程,增加top品牌badge

parent bc0deed5
......@@ -196,6 +196,7 @@ CREATE TABLE IF NOT EXISTS `selection`.`{table_name}`
`skus_num_creat` INT NULL,
`skus_num_creat_all` INT NULL,
`title_matching_degree` DECIMAL(20,4) NULL,
`brand_badge_reason` STRING NULL,
INDEX idx_title (`title`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题倒排索引',
INDEX idx_title_stem (`title_stem`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题词干倒排索引'
) ENGINE=OLAP
......@@ -337,7 +338,8 @@ SELECT
COALESCE(aa.auctions_num_all, 0) AS auctions_num_all,
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
f.title_matching_degree
f.title_matching_degree,
f.brand_badge_reason
FROM `dwt`.`{site_name}_flow_asin_month` f
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
......@@ -495,7 +497,8 @@ def main(site_name, date_info, result_type='formal'):
asin_describe, asin_fbm_price, describe_len,
title_matching_degree,
multi_color_flag, multi_color_str,
amazon_label
amazon_label,
brand_badge_reason
FROM dwt_flow_asin
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
"""
......@@ -633,6 +636,8 @@ def main(site_name, date_info, result_type='formal'):
F.col('multi_color_str'),
# ===== Amazon标签 =====
F.col('amazon_label'),
# ===== 品牌徽章 =====
F.col('brand_badge_reason'),
).cache()
count = df_save.count()
......@@ -657,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label"
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label, brand_badge_reason"
)
print(f"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}")
DorisHelper.spark_export_with_columns(
......
......@@ -80,6 +80,7 @@ class DwtFlowAsin(Templates):
self.df_keepa_asin = self.spark.sql(f"select 1+1;")
self.df_asin_source_flag = self.spark.sql(f"select 1+1;")
self.df_parent_asin_variat_agg = self.spark.sql(f"select 1+1;")
self.df_brand_badge = self.spark.sql(f"select 1+1;")
self.color_set = set() # 颜色词表,read_data 阶段填充
@staticmethod
......@@ -302,7 +303,19 @@ class DwtFlowAsin(Templates):
self.df_asin_source_flag = self.df_asin_source_flag.repartition(60).persist(StorageLevel.DISK_ONLY)
self.df_asin_source_flag.show(10, truncate=False)
print("11.读取颜色词表 dim_asin_color_info")
print("11.读取当月品牌徽章数据 dwd_st_brand_badge")
sql = f"""
select brand, brand_badge_reason
from dwd_st_brand_badge
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
and date_info = '{self.date_info}'
"""
print("sql:" + sql)
self.df_brand_badge = F.broadcast(self.spark.sql(sqlQuery=sql))
self.df_brand_badge.show(10, truncate=False)
print("12.读取颜色词表 dim_asin_color_info")
color_rows = self.spark.sql(
f"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
).collect()
......@@ -643,6 +656,11 @@ class DwtFlowAsin(Templates):
self.df_flow_asin_last.unpersist()
self.df_flow_asin_last_year.unpersist()
def handle_brand_badge(self):
self.df_asin_detail = self.df_asin_detail.join(
self.df_brand_badge, self.df_asin_detail['asin_brand_name'] == self.df_brand_badge['brand'], how='left'
).drop('brand')
def handle_asin_different_source(self):
self.df_asin_detail = self.df_asin_detail.join(
self.df_asin_source_flag, on=['asin'], how='left'
......@@ -809,8 +827,8 @@ class DwtFlowAsin(Templates):
"matrix_ao_val", "follow_sellers_count", "seller_json", "asin_describe", "asin_fbm_price",
"asin_bought_mom", "asin_bought_yoy", "describe_len", "tracking_since", "tracking_since_type",
"asin_source_flag", "bsr_last_seen_at", "bsr_seen_count_30d", "nsr_last_seen_at", "nsr_seen_count_30d",
"multi_color_flag", "multi_color_str", "amazon_label",
"asin_weight_str", "best_sellers_herf", "best_sellers_rank",
"multi_color_flag", "multi_color_str", "amazon_label", "asin_weight_str", "best_sellers_herf", "best_sellers_rank",
"brand_badge_reason",
F.lit(self.site_name).alias("site_name"), F.lit(self.date_type).alias("date_type"),
F.lit(self.date_info).alias("date_info"))
self.df_save = self.df_save.na.fill(
......@@ -932,6 +950,7 @@ class DwtFlowAsin(Templates):
self.handle_other_new_col()
self.handle_multi_color_flag()
self.handle_asin_different_source()
self.handle_brand_badge()
self.handle_column()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment