Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
2691d9d0
Commit
2691d9d0
authored
Mar 16, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
店铺数据解析店铺评分
parent
47359d84
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
1 deletions
+27
-1
dwt_fb_base_report.py
Pyspark_job/dwt/dwt_fb_base_report.py
+22
-0
dwt_fb_base_report.py
Pyspark_job/sqoop_export/dwt_fb_base_report.py
+5
-1
No files found.
Pyspark_job/dwt/dwt_fb_base_report.py
View file @
2691d9d0
...
...
@@ -110,6 +110,7 @@ class DwtFbBaseReport(object):
count_lifetime as count_lifetime_num,
country_name as fb_country_name,
seller_address,
seller_rating,
date_format(updated_at, 'yyyy-MM-dd HH:mm:ss') as fb_crawl_date
from ods_seller_account_feedback
where site_name = '{self.site_name}'
...
...
@@ -134,6 +135,23 @@ class DwtFbBaseReport(object):
"""
self
.
df_fb_feedback
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
self
.
df_fb_feedback
=
self
.
df_fb_feedback
.
drop_duplicates
([
'seller_id'
])
.
cache
()
# 解析seller_rating: 格式为"30天|-|90天|-|1年|-|历史",切分后转double,-1改为0
_rating_split
=
F
.
split
(
F
.
col
(
"seller_rating"
),
r"\|-\|"
)
self
.
df_fb_feedback
=
self
.
df_fb_feedback
\
.
withColumn
(
"rating_30_day_num"
,
F
.
when
(
_rating_split
.
getItem
(
0
)
.
cast
(
DoubleType
())
==
-
1.0
,
F
.
lit
(
0.0
))
.
otherwise
(
_rating_split
.
getItem
(
0
)
.
cast
(
DoubleType
())))
\
.
withColumn
(
"rating_90_day_num"
,
F
.
when
(
_rating_split
.
getItem
(
1
)
.
cast
(
DoubleType
())
==
-
1.0
,
F
.
lit
(
0.0
))
.
otherwise
(
_rating_split
.
getItem
(
1
)
.
cast
(
DoubleType
())))
\
.
withColumn
(
"rating_1_year_num"
,
F
.
when
(
_rating_split
.
getItem
(
2
)
.
cast
(
DoubleType
())
==
-
1.0
,
F
.
lit
(
0.0
))
.
otherwise
(
_rating_split
.
getItem
(
2
)
.
cast
(
DoubleType
())))
\
.
withColumn
(
"rating_lifetime_num"
,
F
.
when
(
_rating_split
.
getItem
(
3
)
.
cast
(
DoubleType
())
==
-
1.0
,
F
.
lit
(
0.0
))
.
otherwise
(
_rating_split
.
getItem
(
3
)
.
cast
(
DoubleType
())))
print
(
sql
)
# 获取我们内部的店铺与asin的数据库(从搜索词抓下来,店铺与asin的关系表)
...
...
@@ -406,6 +424,10 @@ class DwtFbBaseReport(object):
F
.
lit
(
None
)
.
alias
(
'usr_mask_progress'
),
F
.
col
(
'business_name'
),
F
.
col
(
'business_addr'
),
F
.
col
(
'rating_30_day_num'
),
F
.
col
(
'rating_90_day_num'
),
F
.
col
(
'rating_1_year_num'
),
F
.
col
(
'rating_lifetime_num'
),
F
.
lit
(
self
.
site_name
)
.
alias
(
'site_name'
),
F
.
lit
(
self
.
date_type
)
.
alias
(
'date_type'
),
F
.
lit
(
self
.
date_info
)
.
alias
(
'date_info'
)
...
...
Pyspark_job/sqoop_export/dwt_fb_base_report.py
View file @
2691d9d0
...
...
@@ -88,7 +88,11 @@ if __name__ == '__main__':
"usr_mask_type"
,
"usr_mask_progress"
,
"business_name"
,
"business_addr"
"business_addr"
,
"rating_30_day_num"
,
"rating_90_day_num"
,
"rating_1_year_num"
,
"rating_lifetime_num"
],
partition_dict
=
{
"site_name"
:
site_name
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment