Commit 0bcdbf7c by chenyuanjie

流量选品-打包数量解析迭代

parent 5bf88f84
...@@ -159,7 +159,8 @@ class DimAsinDetail(object): ...@@ -159,7 +159,8 @@ class DimAsinDetail(object):
REGEXP_REPLACE(seller_json, chr(10), '') as seller_json, buy_box_seller_type as asin_buy_box_seller_type, REGEXP_REPLACE(seller_json, chr(10), '') as seller_json, buy_box_seller_type as asin_buy_box_seller_type,
customer_reviews_json, parent_asin, img_list, created_at as created_time, updated_at as updated_time, customer_reviews_json, parent_asin, img_list, created_at as created_time, updated_at as updated_time,
updated_at as dt, variat_num as variation_num, fbm_delivery_price as asin_fbm_price, updated_at as dt, variat_num as variation_num, fbm_delivery_price as asin_fbm_price,
get_json_object(product_json, '$.Color') as product_json_color get_json_object(product_json, '$.Color') as product_json_color,
get_json_object(product_json, '$.Number of Items') as product_json_number_of_items
from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}""" from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}"""
print(sql) print(sql)
self.df_asin_detail = self.spark.sql(sqlQuery=sql) self.df_asin_detail = self.spark.sql(sqlQuery=sql)
...@@ -348,15 +349,23 @@ class DimAsinDetail(object): ...@@ -348,15 +349,23 @@ class DimAsinDetail(object):
).withColumn( ).withColumn(
"variat_package_quantity_is_abnormal", self.df_asin_detail.variat_parse.getField("is_package_quantity_abnormal") "variat_package_quantity_is_abnormal", self.df_asin_detail.variat_parse.getField("is_package_quantity_abnormal")
).drop("title_parse", "variat_parse", "variat_attribute") ).drop("title_parse", "variat_parse", "variat_attribute")
# Number of Items:直接从 product_json 提取,cast 失败(脏数据)自动为 null
self.df_asin_detail = self.df_asin_detail.withColumn( self.df_asin_detail = self.df_asin_detail.withColumn(
"package_quantity", F.expr(""" CASE "number_of_items", F.col("product_json_number_of_items").cast("int")
WHEN title_package_quantity is null and variat_package_quantity is not null THEN variat_package_quantity ).drop("product_json_number_of_items")
WHEN title_package_quantity is not null THEN title_package_quantity # 优先级:Number of Items > 属性字段 > 标题解析 > 默认1
self.df_asin_detail = self.df_asin_detail.withColumn(
"package_quantity", F.expr("""CASE
WHEN number_of_items IS NOT NULL AND number_of_items > 0 THEN number_of_items
WHEN variat_package_quantity IS NOT NULL THEN variat_package_quantity
WHEN title_package_quantity IS NOT NULL THEN title_package_quantity
ELSE 1 END""")).withColumn( ELSE 1 END""")).withColumn(
"is_package_quantity_abnormal", F.expr("""CASE "is_package_quantity_abnormal", F.expr("""CASE
WHEN title_package_quantity is null and variat_package_quantity is not null THEN variat_package_quantity_is_abnormal WHEN number_of_items IS NOT NULL AND number_of_items > 0 THEN 0
WHEN title_package_quantity is not null THEN title_package_quantity_is_abnormal WHEN variat_package_quantity IS NOT NULL THEN variat_package_quantity_is_abnormal
ELSE 2 END""")).drop("title_package_quantity", "variat_package_quantity", "title_package_quantity_is_abnormal", "variat_package_quantity_is_abnormal") WHEN title_package_quantity IS NOT NULL THEN title_package_quantity_is_abnormal
ELSE 2 END""")).drop("number_of_items", "title_package_quantity", "variat_package_quantity",
"title_package_quantity_is_abnormal", "variat_package_quantity_is_abnormal")
self.df_asin_detail = self.df_asin_detail.join(self.df_user_package_num, on=['asin', 'asin_title'], how='left') self.df_asin_detail = self.df_asin_detail.join(self.df_user_package_num, on=['asin', 'asin_title'], how='left')
self.df_asin_detail = self.df_asin_detail.withColumn( self.df_asin_detail = self.df_asin_detail.withColumn(
"package_quantity", F.coalesce(F.col("user_package_num"), F.col("package_quantity"))).withColumn( "package_quantity", F.coalesce(F.col("user_package_num"), F.col("package_quantity"))).withColumn(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment