Commit d10e99bf by chenyuanjie

top品牌改为Doris关联查询

parent 84d2c994
......@@ -340,7 +340,7 @@ SELECT
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
f.title_matching_degree,
f.brand_badge_reason
bb.brand_badge_reason
FROM `dwt`.`{site_name}_flow_asin_month` f
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
......@@ -387,6 +387,8 @@ LEFT JOIN (
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
) cf ON f.asin = cf.asin
LEFT JOIN `dwd`.`dwd_asin_auction` aa ON f.asin = aa.asin
LEFT JOIN `dwd`.`dwd_st_brand_badge` bb
ON f.brand = bb.brand AND bb.site_name = '{site_name}' AND bb.date_info = '{date_info}'
WHERE f.date_info = '{date_info}'
"""
......@@ -498,8 +500,7 @@ def main(site_name, date_info, result_type='formal'):
asin_describe, asin_fbm_price, describe_len,
title_matching_degree,
multi_color_flag, multi_color_str,
amazon_label,
brand_badge_reason
amazon_label
FROM dwt_flow_asin
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
"""
......@@ -637,8 +638,6 @@ def main(site_name, date_info, result_type='formal'):
F.col('multi_color_str'),
# ===== Amazon标签 =====
F.col('amazon_label'),
# ===== 品牌徽章 =====
F.col('brand_badge_reason'),
).cache()
count = df_save.count()
......@@ -663,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label, brand_badge_reason"
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label"
)
print(f"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}")
DorisHelper.spark_export_with_columns(
......
......@@ -80,7 +80,6 @@ class DwtFlowAsin(Templates):
self.df_keepa_asin = self.spark.sql(f"select 1+1;")
self.df_asin_source_flag = self.spark.sql(f"select 1+1;")
self.df_parent_asin_variat_agg = self.spark.sql(f"select 1+1;")
self.df_brand_badge = self.spark.sql(f"select 1+1;")
self.color_set = set() # 颜色词表,read_data 阶段填充
@staticmethod
......@@ -303,19 +302,7 @@ class DwtFlowAsin(Templates):
self.df_asin_source_flag = self.df_asin_source_flag.repartition(60).persist(StorageLevel.DISK_ONLY)
self.df_asin_source_flag.show(10, truncate=False)
print("11.读取当月品牌徽章数据 dwd_st_brand_badge")
sql = f"""
select brand, brand_badge_reason
from dwd_st_brand_badge
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
and date_info = '{self.date_info}'
"""
print("sql:" + sql)
self.df_brand_badge = F.broadcast(self.spark.sql(sqlQuery=sql))
self.df_brand_badge.show(10, truncate=False)
print("12.读取颜色词表 dim_asin_color_info")
print("11.读取颜色词表 dim_asin_color_info")
color_rows = self.spark.sql(
f"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
).collect()
......@@ -656,11 +643,6 @@ class DwtFlowAsin(Templates):
self.df_flow_asin_last.unpersist()
self.df_flow_asin_last_year.unpersist()
def handle_brand_badge(self):
self.df_asin_detail = self.df_asin_detail.join(
self.df_brand_badge, self.df_asin_detail['asin_brand_name'] == self.df_brand_badge['brand'], how='left'
).drop('brand')
def handle_asin_different_source(self):
self.df_asin_detail = self.df_asin_detail.join(
self.df_asin_source_flag, on=['asin'], how='left'
......@@ -827,8 +809,7 @@ class DwtFlowAsin(Templates):
"matrix_ao_val", "follow_sellers_count", "seller_json", "asin_describe", "asin_fbm_price",
"asin_bought_mom", "asin_bought_yoy", "describe_len", "tracking_since", "tracking_since_type",
"asin_source_flag", "bsr_last_seen_at", "bsr_seen_count_30d", "nsr_last_seen_at", "nsr_seen_count_30d",
"multi_color_flag", "multi_color_str", "amazon_label", "asin_weight_str", "best_sellers_herf", "best_sellers_rank",
"brand_badge_reason",
"multi_color_flag", "multi_color_str", "amazon_label", "asin_weight_str", "best_sellers_herf", "best_sellers_rank",
F.lit(self.site_name).alias("site_name"), F.lit(self.date_type).alias("date_type"),
F.lit(self.date_info).alias("date_info"))
self.df_save = self.df_save.na.fill(
......@@ -950,7 +931,6 @@ class DwtFlowAsin(Templates):
self.handle_other_new_col()
self.handle_multi_color_flag()
self.handle_asin_different_source()
self.handle_brand_badge()
self.handle_column()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment