Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
957e48f8
Commit
957e48f8
authored
May 12, 2026
by
hejiangming
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
店铺新增字段计算 导出增加判断 空分区不执行
parent
dac2671d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
7 deletions
+46
-7
dwt_fb_base_report.py
Pyspark_job/dwt/dwt_fb_base_report.py
+0
-0
dwt_fb_base_report.py
Pyspark_job/sqoop_export/dwt_fb_base_report.py
+42
-4
ods_seller_account_feedback.py
Pyspark_job/sqoop_import/ods_seller_account_feedback.py
+4
-3
No files found.
Pyspark_job/dwt/dwt_fb_base_report.py
View file @
957e48f8
This diff is collapsed.
Click to expand it.
Pyspark_job/sqoop_export/dwt_fb_base_report.py
View file @
957e48f8
...
@@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(sys.path[0]))
...
@@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(sys.path[0]))
from
utils.db_util
import
DBUtil
from
utils.db_util
import
DBUtil
from
utils.ssh_util
import
SSHUtil
from
utils.ssh_util
import
SSHUtil
from
utils.common_util
import
CommonUtil
from
utils.common_util
import
CommonUtil
from
utils.hdfs_utils
import
HdfsUtils
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
site_name
=
CommonUtil
.
get_sys_arg
(
1
,
None
)
site_name
=
CommonUtil
.
get_sys_arg
(
1
,
None
)
date_type
=
CommonUtil
.
get_sys_arg
(
2
,
None
)
date_type
=
CommonUtil
.
get_sys_arg
(
2
,
None
)
...
@@ -36,6 +36,15 @@ if __name__ == '__main__':
...
@@ -36,6 +36,15 @@ if __name__ == '__main__':
# 获取数据库连接
# 获取数据库连接
engine
=
DBUtil
.
get_db_engine
(
db_type
,
site_name
)
engine
=
DBUtil
.
get_db_engine
(
db_type
,
site_name
)
# 导出前校验 Hive 分区是否有数据,避免空分区触发交换导致 PG 数据被清空
hive_partition_path
=
f
"/home/big_data_selection/dwt/dwt_aba_st_analytics/site_name={site_name}/date_type={date_type}/date_info={date_info}"
hive_files
=
HdfsUtils
.
read_list
(
hive_partition_path
)
if
not
hive_files
:
print
(
f
"[ERROR] Hive 分区无数据文件,路径:{hive_partition_path},跳过导出,请先检查 DWT 计算任务是否正常写入!"
)
engine
.
dispose
()
sys
.
exit
(
1
)
print
(
f
"Hive 分区文件数:{len(hive_files)},路径:{hive_partition_path},继续导出"
)
# 保证幂等性,先删除原始表同周期的数据
# 保证幂等性,先删除原始表同周期的数据
sql
=
f
"""
sql
=
f
"""
drop table if exists {export_tb};
drop table if exists {export_tb};
...
@@ -92,7 +101,36 @@ if __name__ == '__main__':
...
@@ -92,7 +101,36 @@ if __name__ == '__main__':
"rating_30_day_num"
,
"rating_30_day_num"
,
"rating_90_day_num"
,
"rating_90_day_num"
,
"rating_1_year_num"
,
"rating_1_year_num"
,
"rating_lifetime_num"
"rating_lifetime_num"
,
# 功能1:销量展示
"fb_shop_total_sales"
,
"fb_shop_home_sales"
,
"fb_shop_home_null_flag"
,
# 功能2:FBM 占比
"fb_fbm_asin_num"
,
"fb_fbm_valid_asin_num"
,
"fb_fbm_rate"
,
"fb_fbm_valid_rate"
,
# 功能3:Hot New 榜单
"fb_nsr_asin_num"
,
"fb_nsr_rate"
,
# 功能4:Best Seller 榜单
"fb_bs_asin_num"
,
"fb_bs_rate"
,
# 功能5:新品销量占比 / 新品数量占比(流量选品口径)
"fb_new_asin_sales_rate"
,
# 功能6:新品/老品平均利润率
"fb_new_ocean_profit_rate"
,
"fb_new_air_profit_rate"
,
"fb_old_ocean_profit_rate"
,
"fb_old_air_profit_rate"
,
"fb_flow_new_asin_rate"
,
# 功能7:店铺综合评分(各星级占比)
"fb_star_5_pct"
,
"fb_star_4_pct"
,
"fb_star_3_pct"
,
"fb_star_2_pct"
,
"fb_star_1_pct"
],
],
partition_dict
=
{
partition_dict
=
{
"site_name"
:
site_name
,
"site_name"
:
site_name
,
...
@@ -117,8 +155,8 @@ if __name__ == '__main__':
...
@@ -117,8 +155,8 @@ if __name__ == '__main__':
cp_index_flag
=
False
,
cp_index_flag
=
False
,
)
)
update_workflow_sql
=
f
"""
update_workflow_sql
=
f
"""
update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback'
update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback'
and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}'
and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}'
"""
"""
CommonUtil
.
modify_export_workflow_status
(
update_workflow_sql
,
site_name
,
date_type
,
date_info
)
CommonUtil
.
modify_export_workflow_status
(
update_workflow_sql
,
site_name
,
date_type
,
date_info
)
...
...
Pyspark_job/sqoop_import/ods_seller_account_feedback.py
View file @
957e48f8
...
@@ -33,9 +33,10 @@ if __name__ == '__main__':
...
@@ -33,9 +33,10 @@ if __name__ == '__main__':
num,
num,
created_at,
created_at,
updated_at,
updated_at,
regexp_replace(seller_address, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as seller_address,
regexp_replace(seller_address, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as seller_address,
regexp_replace(seller_rating, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as seller_rating,
regexp_replace(seller_rating, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as seller_rating,
regexp_replace(feedback_histogram, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as feedback_histogram
regexp_replace(feedback_histogram, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as feedback_histogram,
regexp_replace(metadata_json, E'[
\\
r
\\
n
\\
t]+', ' ', 'g') as metadata_json
from {import_table}
from {import_table}
where 1=1
where 1=1
and
\
$CONDITIONS
and
\
$CONDITIONS
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment