Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
9a02c65a
Commit
9a02c65a
authored
Mar 26, 2026
by
wangjing
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
68c22cae
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
1 deletions
+5
-1
dim_st_detail_week.py
Pyspark_job/dim/dim_st_detail_week.py
+4
-0
dim_st_detail_week.py
Pyspark_job/sqoop_export/dim_st_detail_week.py
+1
-1
No files found.
Pyspark_job/dim/dim_st_detail_week.py
View file @
9a02c65a
...
...
@@ -98,6 +98,10 @@ class DwtStDetailWeek(object):
'dt_rank'
,
F
.
row_number
()
.
over
(
window
=
window
)
)
.
filter
(
'dt_rank=1'
)
.
drop
(
'dt_rank'
,
'updated_time'
)
.
cache
()
# 对数据列清洗 有些是\0的数据
for
col
in
[
'search_term'
,
'asin1'
,
'asin2'
,
'asin3'
,
'product_title1'
,
'product_title2'
,
'product_title3'
,
'brand1'
,
'brand2'
,
'brand3'
,
'category1'
,
'category2'
,
'category3'
]:
self
.
df_st_detail
=
self
.
df_st_detail
.
withColumn
(
col
,
F
.
regexp_replace
(
F
.
col
(
col
),
'
\x00
'
,
''
))
self
.
df_st_detail_last_week
=
self
.
df_st_detail
.
filter
(
f
"date_info = '{self.date_info_last_week}'"
)
for
col
in
self
.
cols
:
self
.
df_st_detail_last_week
=
self
.
df_st_detail_last_week
.
withColumnRenamed
(
...
...
Pyspark_job/sqoop_export/dim_st_detail_week.py
View file @
9a02c65a
...
...
@@ -103,7 +103,7 @@ if __name__ == '__main__':
"site_name"
:
site_name
,
"date_type"
:
date_type
,
"date_info"
:
date_info
}
}
,
num_mappers
=
2
)
client
=
SSHUtil
.
get_ssh_client
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment