Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
6e15b3dc
Commit
6e15b3dc
authored
Dec 22, 2025
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ABA搜索词-新增字段DD50、DD100、DD200
parent
62e30b59
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
6 deletions
+29
-6
dwd_st_asin_measure.py
Pyspark_job/dwd/dwd_st_asin_measure.py
+21
-4
dwt_aba_st_analytics.py
Pyspark_job/dwt/dwt_aba_st_analytics.py
+8
-2
No files found.
Pyspark_job/dwd/dwd_st_asin_measure.py
View file @
6e15b3dc
...
...
@@ -175,7 +175,7 @@ class DwdStMeasure(Templates):
self
.
df_asin_bs
=
self
.
spark
.
sql
(
sql
)
.
cache
()
self
.
df_asin_bs
.
show
(
10
)
sql
=
f
"select asin, asin_title, asin_price, parent_asin "
\
sql
=
f
"select asin, asin_title, asin_price, parent_asin
, asin_bought_month
"
\
f
"from dim_asin_detail where site_name='{self.site_name}' and date_type='{self.date_type.replace('_old', '')}' and date_info='{self.date_info}';"
print
(
"sql:"
,
sql
)
self
.
df_asin_detail
=
self
.
spark
.
sql
(
sql
)
.
cache
()
...
...
@@ -413,17 +413,34 @@ class DwdStMeasure(Templates):
df_st_asin_agg
,
on
=
[
'asin'
],
how
=
'left'
)
elif
cal_type
==
"st"
:
# 计算搜索词DD占比
df_asin_bought_month
=
self
.
df_asin_detail
.
select
(
'asin'
,
'asin_bought_month'
)
.
join
(
self
.
df_asin_amazon_orders
,
on
=
[
'asin'
],
how
=
'left'
)
.
withColumn
(
"asin_bought_month"
,
F
.
coalesce
(
F
.
col
(
"asin_bought_month"
),
F
.
col
(
"asin_amazon_orders"
))
)
.
drop
(
"asin_amazon_orders"
)
df_st_asin_agg
=
self
.
df_st_asin_duplicated
.
select
(
"search_term"
,
"asin"
)
.
join
(
self
.
df_asin_self
,
on
=
'asin'
,
how
=
'left'
)
.
join
(
df_asin_bought_month
,
on
=
'asin'
,
how
=
'left'
)
.
withColumn
(
"is_self_asin"
,
F
.
when
(
F
.
col
(
"is_self_asin"
)
.
isNotNull
(),
F
.
col
(
"is_self_asin"
))
.
otherwise
(
F
.
lit
(
0
))
)
.
groupby
([
'search_term'
])
.
agg
(
F
.
sum
(
'is_self_asin'
)
.
alias
(
"st_self_asin_counts"
),
F
.
count
(
'asin'
)
.
alias
(
"st_total_asin_counts"
)
F
.
count
(
'asin'
)
.
alias
(
"st_total_asin_counts"
),
F
.
sum
(
F
.
when
(
F
.
col
(
"asin_bought_month"
)
>=
50
,
1
)
.
otherwise
(
0
))
.
alias
(
"st_dd50_counts"
),
F
.
sum
(
F
.
when
(
F
.
col
(
"asin_bought_month"
)
>=
100
,
1
)
.
otherwise
(
0
))
.
alias
(
"st_dd100_counts"
),
F
.
sum
(
F
.
when
(
F
.
col
(
"asin_bought_month"
)
>=
200
,
1
)
.
otherwise
(
0
))
.
alias
(
"st_dd200_counts"
)
)
.
withColumn
(
'st_self_asin_proportion'
,
F
.
round
(
F
.
col
(
'st_self_asin_counts'
)
/
F
.
col
(
'st_total_asin_counts'
),
4
)
)
.
withColumn
(
'st_dd50_proportion'
,
F
.
round
(
F
.
col
(
'st_dd50_counts'
)
/
F
.
col
(
'st_total_asin_counts'
),
4
)
)
.
withColumn
(
'st_dd100_proportion'
,
F
.
round
(
F
.
col
(
'st_dd100_counts'
)
/
F
.
col
(
'st_total_asin_counts'
),
4
)
)
.
withColumn
(
'st_self_asin_proportion'
,
F
.
round
(
F
.
col
(
'st_self_asin_counts'
)
/
F
.
col
(
'st_total_asin_counts'
),
4
)
'st_dd200_proportion'
,
F
.
round
(
F
.
col
(
'st_dd200_counts'
)
/
F
.
col
(
'st_total_asin_counts'
),
4
)
)
df
=
df
.
join
(
df_st_asin_agg
,
on
=
[
'search_term'
],
how
=
'left'
...
...
Pyspark_job/dwt/dwt_aba_st_analytics.py
View file @
6e15b3dc
...
...
@@ -181,7 +181,10 @@ class DwtAbaStAnalytics(Templates):
st_zr_counts,
st_sp_counts,
st_self_asin_counts,
st_self_asin_proportion
st_self_asin_proportion,
st_dd50_proportion,
st_dd100_proportion,
st_dd200_proportion
from dwd_st_measure
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
...
...
@@ -903,7 +906,10 @@ class DwtAbaStAnalytics(Templates):
"st_self_asin_proportion"
,
"lang"
,
"asin_movie_type_count"
,
"is_hidden_cate"
"is_hidden_cate"
,
"st_dd50_proportion"
,
"st_dd100_proportion"
,
"st_dd200_proportion"
)
# 空值处理
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment