Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
2fc06234
Commit
2fc06234
authored
Feb 04, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
f4da7619
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
5 deletions
+5
-5
dim_asin_profit_rate_info.py
Pyspark_job/dim/dim_asin_profit_rate_info.py
+1
-1
dwt_flow_keepa_asin.py
Pyspark_job/dwt/dwt_flow_keepa_asin.py
+4
-4
No files found.
Pyspark_job/dim/dim_asin_profit_rate_info.py
View file @
2fc06234
...
...
@@ -38,7 +38,7 @@ class DimAsinProfitRateInfo(object):
def
handle_data
(
self
):
# 去重
window
=
Window
.
partitionBy
([
'asin'
,
'price'
])
.
orderBy
(
window
=
Window
.
partitionBy
([
'asin'
,
'price'
,
'package_length'
,
'package_width'
,
'package_height'
,
'weight'
])
.
orderBy
(
self
.
df_asin_profit
.
updated_time
.
desc_nulls_last
()
)
self
.
df_asin_profit
=
self
.
df_asin_profit
.
withColumn
(
...
...
Pyspark_job/dwt/dwt_flow_keepa_asin.py
View file @
2fc06234
...
...
@@ -63,13 +63,13 @@ class DwtFlowKeepaAsin(object):
# 读取已经计算过利润率的asin
sql
=
f
"""
select asin, price from dim_asin_profit_rate_info where site_name = '{self.site_name}' and date_info = '{self.date_info}'
select asin, price
, package_length, package_width, package_height, weight
from dim_asin_profit_rate_info where site_name = '{self.site_name}' and date_info = '{self.date_info}'
"""
self
.
df_calc_asin
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
.
cache
()
# 读取已经导出过asin+price,避免重复计算
sql
=
f
"""
select asin, price from dwt_flow_keepa_asin where site_name = '{self.site_name}'
select asin, price
, package_length, package_width, package_height, weight
from dwt_flow_keepa_asin where site_name = '{self.site_name}'
"""
self
.
df_export_asin
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
.
cache
()
...
...
@@ -79,9 +79,9 @@ class DwtFlowKeepaAsin(object):
)
.
join
(
self
.
df_keepa_asin
,
on
=
'asin'
,
how
=
'inner'
)
.
join
(
self
.
df_calc_asin
,
on
=
[
'asin'
,
'price'
],
how
=
'left_anti'
self
.
df_calc_asin
,
on
=
[
'asin'
,
'price'
,
'package_length'
,
'package_width'
,
'package_height'
,
'weight'
],
how
=
'left_anti'
)
.
join
(
self
.
df_export_asin
,
on
=
[
'asin'
,
'price'
],
how
=
'left_anti'
self
.
df_export_asin
,
on
=
[
'asin'
,
'price'
,
'package_length'
,
'package_width'
,
'package_height'
,
'weight'
],
how
=
'left_anti'
)
.
cache
()
self
.
df_flow_asin
.
unpersist
()
self
.
df_category_id
.
unpersist
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment