Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
bef91688
Commit
bef91688
authored
Jun 12, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新品asin规则迭代
parent
b0597344
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
2 deletions
+17
-2
dim_asin_detail.py
Pyspark_job/dim/dim_asin_detail.py
+17
-2
No files found.
Pyspark_job/dim/dim_asin_detail.py
View file @
bef91688
...
...
@@ -71,6 +71,7 @@ class DimAsinDetail(object):
self
.
df_self_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_category
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_variat
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_keepa_tracking
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
# 调用公用udf函数
self
.
udf_new_asin_flag
=
F
.
udf
(
udf_new_asin_flag
,
IntegerType
())
self
.
handle_string_num_value
=
F
.
udf
(
myUDF
,
StringType
())
...
...
@@ -215,6 +216,17 @@ class DimAsinDetail(object):
"category_first_name"
,
F
.
lower
(
"category_first_name"
)
)
.
repartition
(
100
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_asin_category
.
show
(
10
,
truncate
=
False
)
print
(
"9. 获取keepa追踪时间"
)
sql
=
f
"""
select asin,
date_format(from_unixtime((cast(tracking_since as bigint) + 21564000) * 60), 'yyyy-MM-dd') as keepa_tracking_since
from dim_keepa_asin_info
where site_name='{self.site_name}'
and tracking_since is not null
"""
print
(
sql
)
self
.
df_keepa_tracking
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
100
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_keepa_tracking
.
show
(
10
,
truncate
=
False
)
if
self
.
date_type
in
[
'month'
,
'month_week'
,
'month_aba_me'
]
and
self
.
date_info
<
'2024-06'
:
sql
=
f
"""
SELECT asin, parent_asin, color as asin_color, `size` as asin_size, style as asin_style,
...
...
@@ -414,12 +426,15 @@ class DimAsinDetail(object):
)
.
otherwise
(
F
.
lit
(
None
))
)
self
.
df_asin_keep_date
.
unpersist
()
# 关联 keepa 追踪时间
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_keepa_tracking
,
on
=
'asin'
,
how
=
'left'
)
self
.
df_keepa_tracking
.
unpersist
()
# 处理asin各类型信息
def
handle_asin_flag
(
self
):
# 生成is_asin_new字段(是否asin新品标记)
self
.
df_asin_detail
=
self
.
df_asin_detail
.
withColumn
(
"asin_is_new"
,
self
.
udf_new_asin_flag
(
F
.
col
(
'asin_launch_time'
),
F
.
lit
(
self
.
cal_date
)))
\
"asin_is_new"
,
self
.
udf_new_asin_flag
(
F
.
greatest
(
F
.
col
(
'asin_launch_time'
),
F
.
col
(
'keepa_tracking_since'
)
),
F
.
lit
(
self
.
cal_date
)))
\
.
withColumn
(
"asin_is_aadd"
,
F
.
expr
(
f
"""CASE WHEN INSTR(asin_img_type, '3') > 0 THEN 1 ELSE 0 END"""
))
\
.
withColumn
(
"asin_is_video"
,
F
.
expr
(
f
"""CASE WHEN INSTR(asin_img_type, '2') > 0 THEN 1 ELSE 0 END"""
))
\
.
withColumn
(
"asin_is_picture"
,
F
.
expr
(
f
"""CASE WHEN INSTR(asin_img_type, '1') > 0 THEN 1 ELSE 0 END"""
))
\
...
...
@@ -554,7 +569,7 @@ class DimAsinDetail(object):
"asin_bought_month"
,
"asin_length"
,
"asin_width"
,
"asin_height"
,
"asin_is_self"
,
"customer_reviews_json"
,
"img_list"
,
"variat_list"
,
F
.
round
(
"asin_fbm_price"
,
2
)
.
alias
(
"asin_fbm_price"
),
"current_asin"
,
"amazon_label"
,
"current_asin"
,
"amazon_label"
,
"keepa_tracking_since"
,
F
.
lit
(
self
.
site_name
)
.
alias
(
'site_name'
),
F
.
lit
(
self
.
date_type
)
.
alias
(
'date_type'
),
F
.
lit
(
self
.
date_info
)
.
alias
(
'date_info'
))
.
persist
(
StorageLevel
.
MEMORY_ONLY
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment