Commit 8b03ea2a by chenyuanjie

隐藏分类补充解析

parent b75a2b29
...@@ -521,10 +521,27 @@ class DwtFlowAsin(Templates): ...@@ -521,10 +521,27 @@ class DwtFlowAsin(Templates):
CASE WHEN hide_flag = 1 THEN 1 WHEN category_first_id = 'grocery' and category_id != '6492272011' THEN 1 CASE WHEN hide_flag = 1 THEN 1 WHEN category_first_id = 'grocery' and category_id != '6492272011' THEN 1
WHEN category_id in ('21393128011', '21377129011', '21377127011', '21377130011', '21388218011', '21377132011') THEN 1 WHEN category_id in ('21393128011', '21377129011', '21377127011', '21377130011', '21388218011', '21377132011') THEN 1
ELSE 0 END""")).drop("hide_flag") ELSE 0 END""")).drop("hide_flag")
self.df_asin_detail = self.df_asin_detail.withColumn("asin_is_need", F.expr(""" # 解析 asin_category_desc 取 › 分隔的第一个元素作为补充分类名称
CASE WHEN category_first_id in ('mobile-apps', 'audible', 'books', 'music', 'dmusic', 'digital-text', 'magazines', 'movies-tv', 'software', 'videogames', 'amazon-devices', 'boost', 'us-live-explorations', 'amazon-renewed') THEN 1 self.df_asin_detail = self.df_asin_detail.withColumn(
WHEN asin NOT LIKE 'B0%' THEN 1 "desc_category_first_name",
F.lower(F.trim(F.split(F.col("asin_category_desc"), "›").getItem(0)))
)
# 读取 Hive 分类维表,获取分类名称与ID的对应关系
sql_dim = f"""
select lower(trim(en_name)) as desc_category_first_name, category_first_id as desc_category_first_id
from dim_bsr_category_tree where site_name = '{self.site_name}' and category_parent_id = 0 and leaf_node = 2
"""
df_bsr_category = F.broadcast(self.spark.sql(sqlQuery=sql_dim))
# join 补充分类ID
self.df_asin_detail = self.df_asin_detail.join(df_bsr_category, on=['desc_category_first_name'], how='left')
# 两个分类ID均在过滤列表中才标记为1
need_categories = "('mobile-apps', 'audible', 'books', 'music', 'dmusic', 'digital-text', 'magazines', 'movies-tv', 'software', 'videogames', 'amazon-devices', 'boost', 'us-live-explorations', 'amazon-renewed')"
self.df_asin_detail = self.df_asin_detail.withColumn("asin_is_need", F.expr(f"""
CASE WHEN category_first_id in {need_categories}
AND desc_category_first_id in {need_categories} THEN 1
WHEN asin NOT LIKE 'B0%' THEN 1
ELSE 0 END""")) ELSE 0 END"""))
self.df_asin_detail = self.df_asin_detail.drop("desc_category_first_name", "desc_category_first_id")
self.df_asin_detail = self.df_asin_detail.withColumn("asin_type", F.expr(""" self.df_asin_detail = self.df_asin_detail.withColumn("asin_type", F.expr("""
CASE WHEN asin_is_self=1 THEN 1 WHEN asin_is_need=1 THEN 2 WHEN asin_is_hide=1 THEN 3 ELSE 0 END""" CASE WHEN asin_is_self=1 THEN 1 WHEN asin_is_need=1 THEN 2 WHEN asin_is_hide=1 THEN 3 ELSE 0 END"""
)).drop("asin_is_self", "asin_is_need", "asin_is_hide") )).drop("asin_is_self", "asin_is_need", "asin_is_hide")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment