Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
e8a5c1e8
Commit
e8a5c1e8
authored
Feb 27, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
每日利润率清洗
parent
40fcc4ea
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
1 deletions
+13
-1
dim_asin_profit_rate_info.py
Pyspark_job/dim/dim_asin_profit_rate_info.py
+13
-1
No files found.
Pyspark_job/dim/dim_asin_profit_rate_info.py
View file @
e8a5c1e8
...
...
@@ -20,6 +20,7 @@ class DimAsinProfitRateInfo(object):
self
.
df_asin_profit
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_profit_history
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_save
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
def
run
(
self
):
...
...
@@ -36,9 +37,20 @@ class DimAsinProfitRateInfo(object):
self
.
df_asin_profit
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
.
cache
()
self
.
df_asin_profit_history
=
self
.
df_asin_profit
.
filter
(
f
"date_info < '{self.date_info}'"
)
.
cache
()
# 读取keepa数据
sql
=
f
"""
select asin, package_length, package_width, package_height, weight
from dim_keepa_asin_info where site_name = '{self.site_name}';
"""
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
def
handle_data
(
self
):
# 因为keepa数据存在更新的情况,保留与keepa最新数据所对应的数据行
self
.
df_asin_profit
=
self
.
df_asin_profit
.
join
(
self
.
df_keepa_asin
,
on
=
[
'asin'
,
'package_length'
,
'package_width'
,
'package_height'
,
'weight'
],
how
=
'inner'
)
# 去重
window
=
Window
.
partitionBy
([
'asin'
,
'price'
,
'package_length'
,
'package_width'
,
'package_height'
,
'weight'
])
.
orderBy
(
window
=
Window
.
partitionBy
([
'asin'
,
'price'
])
.
orderBy
(
self
.
df_asin_profit
.
updated_time
.
desc_nulls_last
()
)
self
.
df_asin_profit
=
self
.
df_asin_profit
.
withColumn
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment