Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
3d10c8d2
Commit
3d10c8d2
authored
Jan 26, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ABA搜索词-更新利润率数据
parent
ff1ff0b8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
22 deletions
+34
-22
dwt_aba_st_analytics.py
Pyspark_job/dwt/dwt_aba_st_analytics.py
+34
-22
No files found.
Pyspark_job/dwt/dwt_aba_st_analytics.py
View file @
3d10c8d2
...
...
@@ -48,10 +48,11 @@ class DwtAbaStAnalytics(Templates):
self
.
df_st_detail
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_st_key
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_st_market
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_st_volume_fba
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
#
self.df_st_volume_fba = self.spark.sql(f"select 1+1;")
self
.
df_st_brand
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_label
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_is_hidden_cate
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_profit_rate
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
# 自定义udf函数注册
self
.
u_contains
=
self
.
spark
.
udf
.
register
(
'u_contains'
,
self
.
udf_contains
,
IntegerType
())
...
...
@@ -377,20 +378,20 @@ class DwtAbaStAnalytics(Templates):
self
.
df_st_market
.
show
(
10
,
truncate
=
True
)
# 获取dwd_st_volume_fba 取gross_profit_fee_air 和 gross_profit_fee_sea
sql
=
f
"""
select
search_term,
gross_profit_fee_air,
gross_profit_fee_sea
from dwd_st_volume_fba
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
and date_info = '{self.date_info}'
"""
self
.
df_st_volume_fba
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
self
.
df_st_volume_fba
=
self
.
df_st_volume_fba
.
repartition
(
80
,
'search_term'
)
.
cache
()
print
(
"self.df_st_volume_fba:"
)
self
.
df_st_volume_fba
.
show
(
10
,
truncate
=
True
)
#
sql = f"""
# select
#
search_term,
#
gross_profit_fee_air,
# gross_profit_fee_sea
# from dwd_st_volume_fba
# where site_name = '{self.site_name}'
# and date_type = '{self.date_type}'
# and date_info = '{self.date_info}'
#
"""
#
self.df_st_volume_fba = self.spark.sql(sqlQuery=sql)
#
self.df_st_volume_fba = self.df_st_volume_fba.repartition(80, 'search_term').cache()
#
print("self.df_st_volume_fba:")
#
self.df_st_volume_fba.show(10, truncate=True)
# 获取影视标签dim_asin_label 取 asin_label_type
sql
=
f
"""
...
...
@@ -459,6 +460,15 @@ class DwtAbaStAnalytics(Templates):
print
(
"self.df_is_hidden_cate:"
)
self
.
df_is_hidden_cate
.
show
(
10
,
truncate
=
True
)
# asin利润率
sql
=
f
"""
select asin, price as asin_price, ocean_profit, air_profit
from dim_asin_profit_rate_info where site_name = '{self.site_name}'
"""
self
.
df_asin_profit_rate
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
80
,
'asin'
)
.
dropDuplicates
([
'asin'
,
'asin_price'
])
.
cache
()
print
(
"self.df_asin_profit_rate:"
)
self
.
df_asin_profit_rate
.
show
(
10
,
truncate
=
True
)
def
handle_data
(
self
):
# 对基础计算表进行关联
self
.
handle_base_join
()
...
...
@@ -485,11 +495,14 @@ class DwtAbaStAnalytics(Templates):
self
.
df_asin_detail
,
on
=
[
'asin'
],
how
=
'left'
)
.
join
(
self
.
df_asin_label
,
on
=
[
'asin'
],
how
=
'left'
)
.
join
(
self
.
df_asin_profit_rate
,
on
=
[
'asin'
,
'asin_price'
],
how
=
'left'
)
.
cache
()
self
.
df_st_asin_measure
.
unpersist
()
self
.
df_asin_measure
.
unpersist
()
self
.
df_asin_detail
.
unpersist
()
self
.
df_asin_label
.
unpersist
()
self
.
df_asin_profit_rate
.
unpersist
()
self
.
df_st_asin_cal
=
self
.
df_st_asin_join
.
join
(
self
.
df_seller_asin_country
,
on
=
[
'asin'
],
how
=
'left'
...
...
@@ -556,7 +569,9 @@ class DwtAbaStAnalytics(Templates):
F
.
sum
(
"asin_amazon_orders"
)
.
alias
(
"amazon_monthly_sales"
),
F
.
avg
(
"asin_title_len"
)
.
alias
(
"title_length_avg"
),
F
.
avg
(
"asin_rating"
)
.
alias
(
"rating_avg"
),
F
.
avg
(
"asin_total_comments"
)
.
alias
(
"total_comments_avg"
)
F
.
avg
(
"asin_total_comments"
)
.
alias
(
"total_comments_avg"
),
F
.
avg
(
"ocean_profit"
)
.
alias
(
"ocean_profit_avg"
),
F
.
avg
(
"air_profit"
)
.
alias
(
"air_profit_avg"
)
)
.
repartition
(
80
,
'search_term'
)
.
cache
()
def
handle_brand_seller_agg
(
self
):
...
...
@@ -767,8 +782,6 @@ class DwtAbaStAnalytics(Templates):
)
.
join
(
self
.
df_st_market
,
on
=
[
'search_term'
],
how
=
'left'
)
.
join
(
self
.
df_st_volume_fba
,
on
=
[
'search_term'
],
how
=
'left'
)
.
join
(
self
.
df_st_brand
,
on
=
[
'search_term'
],
how
=
'left'
)
.
join
(
self
.
df_is_hidden_cate
,
on
=
[
'st_bsr_cate_1_id_new'
],
how
=
'left'
...
...
@@ -777,7 +790,6 @@ class DwtAbaStAnalytics(Templates):
self
.
df_st_key
.
unpersist
()
self
.
df_st_num_stats
.
unpersist
()
self
.
df_st_market
.
unpersist
()
self
.
df_st_volume_fba
.
unpersist
()
self
.
df_st_brand
.
unpersist
()
self
.
df_is_hidden_cate
.
unpersist
()
...
...
@@ -862,8 +874,6 @@ class DwtAbaStAnalytics(Templates):
"supply_demand"
,
"market_cycle_type"
,
"color_proportion"
,
"gross_profit_fee_air"
,
"gross_profit_fee_sea"
,
"multi_color_proportion"
,
"multi_size_proportion"
,
"st_4_20_ao_avg"
,
...
...
@@ -905,7 +915,9 @@ class DwtAbaStAnalytics(Templates):
"is_hidden_cate"
,
"st_dd50_proportion"
,
"st_dd100_proportion"
,
"st_dd200_proportion"
"st_dd200_proportion"
,
F
.
round
(
"ocean_profit_avg"
,
4
)
.
alias
(
"gross_profit_fee_sea"
),
F
.
round
(
"air_profit_avg"
,
4
)
.
alias
(
"gross_profit_fee_air"
)
)
# 空值处理
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment