Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
5c7a1380
Commit
5c7a1380
authored
Jul 02, 2025
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
关联流量-新增字段 关联数
parent
a0670622
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
2 deletions
+22
-2
dwt_asin_related_traffic.py
Pyspark_job/dwt/dwt_asin_related_traffic.py
+18
-0
export_dwt_asin_related_traffic.py
Pyspark_job/sqoop_export/export_dwt_asin_related_traffic.py
+4
-2
No files found.
Pyspark_job/dwt/dwt_asin_related_traffic.py
View file @
5c7a1380
...
@@ -99,8 +99,25 @@ class DwtAsinRelatedTraffic(object):
...
@@ -99,8 +99,25 @@ class DwtAsinRelatedTraffic(object):
# 将所有编号列进行拼接
# 将所有编号列进行拼接
self
.
df_dim_asin_related_traffic
=
self
.
df_dim_asin_related_traffic
.
withColumn
(
self
.
df_dim_asin_related_traffic
=
self
.
df_dim_asin_related_traffic
.
withColumn
(
"related_type"
,
F
.
concat_ws
(
","
,
*
[
F
.
col
(
f
"{col}_num"
)
for
col
in
cols
])
"related_type"
,
F
.
concat_ws
(
","
,
*
[
F
.
col
(
f
"{col}_num"
)
for
col
in
cols
])
)
.
cache
()
# 统计关联数
df_related
=
self
.
df_dim_asin_related_traffic
.
select
(
'asin'
,
F
.
explode
(
F
.
split
(
F
.
col
(
'related_asin'
),
','
))
.
alias
(
'related_asin'
)
)
.
drop_duplicates
([
'asin'
,
'related_asin'
])
.
groupBy
(
'related_asin'
)
.
agg
(
F
.
count
(
'asin'
)
.
alias
(
'related_count'
)
)
.
withColumnRenamed
(
'related_asin'
,
'asin'
)
)
self
.
df_dim_asin_related_traffic
=
self
.
df_dim_asin_related_traffic
.
join
(
df_related
,
on
=
'asin'
,
how
=
'left'
)
.
fillna
({
'related_count'
:
0
})
# 数据落盘
# 数据落盘
def
save_data
(
self
):
def
save_data
(
self
):
self
.
df_save
=
self
.
df_dim_asin_related_traffic
.
select
(
self
.
df_save
=
self
.
df_dim_asin_related_traffic
.
select
(
...
@@ -108,6 +125,7 @@ class DwtAsinRelatedTraffic(object):
...
@@ -108,6 +125,7 @@ class DwtAsinRelatedTraffic(object):
'related_asin'
,
'related_asin'
,
'related_type'
,
'related_type'
,
'related_time'
,
'related_time'
,
'related_count'
,
F
.
lit
(
self
.
site_name
)
.
alias
(
'site_name'
),
F
.
lit
(
self
.
site_name
)
.
alias
(
'site_name'
),
F
.
lit
(
self
.
date_type
)
.
alias
(
'date_type'
),
F
.
lit
(
self
.
date_type
)
.
alias
(
'date_type'
),
F
.
lit
(
self
.
date_info
)
.
alias
(
'date_info'
)
F
.
lit
(
self
.
date_info
)
.
alias
(
'date_info'
)
...
...
Pyspark_job/sqoop_export/export_dwt_asin_related_traffic.py
View file @
5c7a1380
...
@@ -25,7 +25,8 @@ if __name__ == '__main__':
...
@@ -25,7 +25,8 @@ if __name__ == '__main__':
'asin'
,
'asin'
,
'related_asin'
,
'related_asin'
,
'related_type'
,
'related_type'
,
'related_time'
'related_time'
,
'related_count'
]
]
if
date_type
==
'month'
:
if
date_type
==
'month'
:
...
@@ -48,7 +49,8 @@ if __name__ == '__main__':
...
@@ -48,7 +49,8 @@ if __name__ == '__main__':
asin varchar(10) NOT NULL,
asin varchar(10) NOT NULL,
related_asin varchar(10)[] NOT NULL,
related_asin varchar(10)[] NOT NULL,
related_type int2[] NOT NULL,
related_type int2[] NOT NULL,
related_time varchar(10) NOT NULL
related_time varchar(10) NOT NULL,
related_count int4 NOT NULL
);
);
ALTER TABLE {export_tb} ALTER COLUMN related_asin TYPE text;
ALTER TABLE {export_tb} ALTER COLUMN related_asin TYPE text;
ALTER TABLE {export_tb} ALTER COLUMN related_type TYPE text;
ALTER TABLE {export_tb} ALTER COLUMN related_type TYPE text;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment