Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
0bcdbf7c
Commit
0bcdbf7c
authored
Mar 20, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
流量选品-打包数量解析迭代
parent
5bf88f84
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
7 deletions
+16
-7
dim_asin_detail.py
Pyspark_job/dim/dim_asin_detail.py
+16
-7
No files found.
Pyspark_job/dim/dim_asin_detail.py
View file @
0bcdbf7c
...
@@ -159,7 +159,8 @@ class DimAsinDetail(object):
...
@@ -159,7 +159,8 @@ class DimAsinDetail(object):
REGEXP_REPLACE(seller_json, chr(10), '') as seller_json, buy_box_seller_type as asin_buy_box_seller_type,
REGEXP_REPLACE(seller_json, chr(10), '') as seller_json, buy_box_seller_type as asin_buy_box_seller_type,
customer_reviews_json, parent_asin, img_list, created_at as created_time, updated_at as updated_time,
customer_reviews_json, parent_asin, img_list, created_at as created_time, updated_at as updated_time,
updated_at as dt, variat_num as variation_num, fbm_delivery_price as asin_fbm_price,
updated_at as dt, variat_num as variation_num, fbm_delivery_price as asin_fbm_price,
get_json_object(product_json, '$.Color') as product_json_color
get_json_object(product_json, '$.Color') as product_json_color,
get_json_object(product_json, '$.Number of Items') as product_json_number_of_items
from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}"""
from ods_asin_detail where site_name='{self.site_name}' {self.date_sql}"""
print
(
sql
)
print
(
sql
)
self
.
df_asin_detail
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
self
.
df_asin_detail
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
...
@@ -348,15 +349,23 @@ class DimAsinDetail(object):
...
@@ -348,15 +349,23 @@ class DimAsinDetail(object):
)
.
withColumn
(
)
.
withColumn
(
"variat_package_quantity_is_abnormal"
,
self
.
df_asin_detail
.
variat_parse
.
getField
(
"is_package_quantity_abnormal"
)
"variat_package_quantity_is_abnormal"
,
self
.
df_asin_detail
.
variat_parse
.
getField
(
"is_package_quantity_abnormal"
)
)
.
drop
(
"title_parse"
,
"variat_parse"
,
"variat_attribute"
)
)
.
drop
(
"title_parse"
,
"variat_parse"
,
"variat_attribute"
)
# Number of Items:直接从 product_json 提取,cast 失败(脏数据)自动为 null
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
"package_quantity"
,
F
.
expr
(
""" CASE
"number_of_items"
,
F
.
col
(
"product_json_number_of_items"
)
.
cast
(
"int"
)
WHEN title_package_quantity is null and variat_package_quantity is not null THEN variat_package_quantity
)
.
drop
(
"product_json_number_of_items"
)
WHEN title_package_quantity is not null THEN title_package_quantity
# 优先级:Number of Items > 属性字段 > 标题解析 > 默认1
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
"package_quantity"
,
F
.
expr
(
"""CASE
WHEN number_of_items IS NOT NULL AND number_of_items > 0 THEN number_of_items
WHEN variat_package_quantity IS NOT NULL THEN variat_package_quantity
WHEN title_package_quantity IS NOT NULL THEN title_package_quantity
ELSE 1 END"""
))
.
withColumn
(
ELSE 1 END"""
))
.
withColumn
(
"is_package_quantity_abnormal"
,
F
.
expr
(
"""CASE
"is_package_quantity_abnormal"
,
F
.
expr
(
"""CASE
WHEN title_package_quantity is null and variat_package_quantity is not null THEN variat_package_quantity_is_abnormal
WHEN number_of_items IS NOT NULL AND number_of_items > 0 THEN 0
WHEN title_package_quantity is not null THEN title_package_quantity_is_abnormal
WHEN variat_package_quantity IS NOT NULL THEN variat_package_quantity_is_abnormal
ELSE 2 END"""
))
.
drop
(
"title_package_quantity"
,
"variat_package_quantity"
,
"title_package_quantity_is_abnormal"
,
"variat_package_quantity_is_abnormal"
)
WHEN title_package_quantity IS NOT NULL THEN title_package_quantity_is_abnormal
ELSE 2 END"""
))
.
drop
(
"number_of_items"
,
"title_package_quantity"
,
"variat_package_quantity"
,
"title_package_quantity_is_abnormal"
,
"variat_package_quantity_is_abnormal"
)
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_user_package_num
,
on
=
[
'asin'
,
'asin_title'
],
how
=
'left'
)
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_user_package_num
,
on
=
[
'asin'
,
'asin_title'
],
how
=
'left'
)
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
"package_quantity"
,
F
.
coalesce
(
F
.
col
(
"user_package_num"
),
F
.
col
(
"package_quantity"
)))
.
withColumn
(
"package_quantity"
,
F
.
coalesce
(
F
.
col
(
"user_package_num"
),
F
.
col
(
"package_quantity"
)))
.
withColumn
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment