Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
616c9bd6
Commit
616c9bd6
authored
Apr 21, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
66c45f86
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
4 deletions
+7
-4
es_asin_profit_rate.py
Pyspark_job/export_es/es_asin_profit_rate.py
+7
-4
No files found.
Pyspark_job/export_es/es_asin_profit_rate.py
View file @
616c9bd6
...
@@ -163,17 +163,20 @@ class EsAsinProfitRate(object):
...
@@ -163,17 +163,20 @@ class EsAsinProfitRate(object):
'profit_key'
,
'asin'
,
'price'
,
'ocean_profit'
,
'air_profit'
,
'update_time'
'profit_key'
,
'asin'
,
'price'
,
'ocean_profit'
,
'air_profit'
,
'update_time'
)
)
# 从Doris获取asin_crawl_date
(用于利润率主索引写入)
# 从Doris获取asin_crawl_date
和asin_price,用profit_key关联
df_crawl_date
=
DorisHelper
.
spark_import_with_flight
(
df_crawl_date
=
DorisHelper
.
spark_import_with_flight
(
session
=
self
.
spark
,
session
=
self
.
spark
,
table_identifier
=
f
"selection.{self.site_name}_asin_latest_detail"
,
table_identifier
=
f
"selection.{self.site_name}_asin_latest_detail"
,
read_fields
=
"asin,asin_crawl_date"
read_fields
=
"asin,asin_crawl_date
,asin_price
"
)
.
withColumn
(
)
.
withColumn
(
"asin_crawl_date"
,
F
.
substring
(
F
.
col
(
"asin_crawl_date"
),
1
,
10
)
"asin_crawl_date"
,
F
.
substring
(
F
.
col
(
"asin_crawl_date"
),
1
,
10
)
)
.
repartition
(
40
,
'asin'
)
)
.
withColumn
(
"profit_key"
,
F
.
concat_ws
(
"_"
,
F
.
col
(
"asin"
),
F
.
round
(
F
.
col
(
"asin_price"
),
2
))
)
.
select
(
'profit_key'
,
'asin_crawl_date'
)
\
.
repartition
(
40
,
'profit_key'
)
self
.
df_asin_profit_rate
=
self
.
df_asin_profit_rate
.
join
(
self
.
df_asin_profit_rate
=
self
.
df_asin_profit_rate
.
join
(
df_crawl_date
,
on
=
'
asin
'
,
how
=
'left'
df_crawl_date
,
on
=
'
profit_key
'
,
how
=
'left'
)
.
select
(
)
.
select
(
'profit_key'
,
'asin'
,
'price'
,
'ocean_profit'
,
'air_profit'
,
'update_time'
,
'asin_crawl_date'
'profit_key'
,
'asin'
,
'price'
,
'ocean_profit'
,
'air_profit'
,
'update_time'
,
'asin_crawl_date'
)
.
cache
()
)
.
cache
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment