Commit e7745fcc by chenyuanjie

amazon标签存储格式调整

parent 9cd4cad0
...@@ -162,7 +162,7 @@ class DimAsinDetail(object): ...@@ -162,7 +162,7 @@ class DimAsinDetail(object):
get_json_object(product_json, '$.Color') as product_json_color, get_json_object(product_json, '$.Color') as product_json_color,
get_json_object(product_json, '$.Number of Items') as product_json_number_of_items, get_json_object(product_json, '$.Number of Items') as product_json_number_of_items,
current_asin, current_asin,
nullif(get_json_object(amazon_label, '$.badge_type'), 'unknown') as amazon_label nullif(coalesce(get_json_object(amazon_label, '$[0].badge_type'), get_json_object(amazon_label, '$.badge_type')), 'unknown') as amazon_label
from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}""" from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}"""
print(sql) print(sql)
self.df_asin_detail = self.spark.sql(sqlQuery=sql) self.df_asin_detail = self.spark.sql(sqlQuery=sql)
......
...@@ -810,8 +810,10 @@ class KafkaFlowAsinDetail(Templates): ...@@ -810,8 +810,10 @@ class KafkaFlowAsinDetail(Templates):
.withColumn("img_type_arr", F.split(F.col("img_type"), ","))\ .withColumn("img_type_arr", F.split(F.col("img_type"), ","))\
.withColumn("img_type_arr", F.expr("transform(img_type_arr, x -> cast(x as int))"))\ .withColumn("img_type_arr", F.expr("transform(img_type_arr, x -> cast(x as int))"))\
.withColumn("amazon_label", F.when( .withColumn("amazon_label", F.when(
F.get_json_object(F.col("amazon_label"), "$.badge_type") != "unknown", F.coalesce(F.get_json_object(F.col("amazon_label"), "$[0].badge_type"),
F.get_json_object(F.col("amazon_label"), "$.badge_type") F.get_json_object(F.col("amazon_label"), "$.badge_type")) != "unknown",
F.coalesce(F.get_json_object(F.col("amazon_label"), "$[0].badge_type"),
F.get_json_object(F.col("amazon_label"), "$.badge_type"))
)) ))
df_save = df.select("asin", "ao_val", "zr_counts", "sp_counts", "sb_counts", "vi_counts", "bs_counts", "ac_counts", df_save = df.select("asin", "ao_val", "zr_counts", "sp_counts", "sb_counts", "vi_counts", "bs_counts", "ac_counts",
"tr_counts", "er_counts", "bsr_orders", "bsr_orders_sale", "title", "title_len", "price", "tr_counts", "er_counts", "bsr_orders", "bsr_orders_sale", "title", "title_len", "price",
......
...@@ -809,8 +809,10 @@ class KafkaRankAsinDetail(Templates): ...@@ -809,8 +809,10 @@ class KafkaRankAsinDetail(Templates):
.withColumn("img_type_arr", F.split(F.col("img_type"), ","))\ .withColumn("img_type_arr", F.split(F.col("img_type"), ","))\
.withColumn("img_type_arr", F.expr("transform(img_type_arr, x -> cast(x as int))"))\ .withColumn("img_type_arr", F.expr("transform(img_type_arr, x -> cast(x as int))"))\
.withColumn("amazon_label", F.when( .withColumn("amazon_label", F.when(
F.get_json_object(F.col("amazon_label"), "$.badge_type") != "unknown", F.coalesce(F.get_json_object(F.col("amazon_label"), "$[0].badge_type"),
F.get_json_object(F.col("amazon_label"), "$.badge_type") F.get_json_object(F.col("amazon_label"), "$.badge_type")) != "unknown",
F.coalesce(F.get_json_object(F.col("amazon_label"), "$[0].badge_type"),
F.get_json_object(F.col("amazon_label"), "$.badge_type"))
)) ))
df_save = df.select("asin", "ao_val", "zr_counts", "sp_counts", "sb_counts", "vi_counts", "bs_counts", "ac_counts", df_save = df.select("asin", "ao_val", "zr_counts", "sp_counts", "sb_counts", "vi_counts", "bs_counts", "ac_counts",
"tr_counts", "er_counts", "bsr_orders", "bsr_orders_sale", "title", "title_len", "price", "tr_counts", "er_counts", "bsr_orders", "bsr_orders_sale", "title", "title_len", "price",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment