Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
4c3ab8b0
Commit
4c3ab8b0
authored
Apr 15, 2026
by
hejiangming
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
月搜索词 导出 使用交换copy表方式导出 其他不变
parent
007e6b5a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
3 deletions
+34
-3
dwt_aba_st_analytics.py
Pyspark_job/sqoop_export/dwt_aba_st_analytics.py
+34
-3
No files found.
Pyspark_job/sqoop_export/dwt_aba_st_analytics.py
View file @
4c3ab8b0
...
...
@@ -187,17 +187,34 @@ if __name__ == '__main__':
# sql建表和创建分区
sql
=
f
"""
create table if not exists {export_master_tb}
create table if not exists {export_master_tb}
(
like {export_tb_before} including indexes including comments
)
partition by range (date_info);
create table if not exists {export_table} partition of {export_master_tb} for values from ('{date_info}') to ('{next_val}');
truncate table {export_table};
"""
DBUtil
.
engine_exec_sql
(
engine
,
sql
)
if
date_type
==
DateTypes
.
month
.
name
:
# month类型:Sqoop先导入到copy表,再通过分区交换替换正式分区,避免truncate造成的空窗期
export_tb_copy
=
f
"{export_table}_copy"
sql_copy
=
f
"""
create table if not exists {export_tb_copy}
(
like {export_table} including indexes including comments
);
truncate table {export_tb_copy};
"""
DBUtil
.
engine_exec_sql
(
engine
,
sql_copy
)
# Sqoop导入的目标表改为copy表
export_table_original
=
export_table
export_table
=
export_tb_copy
else
:
# day/week类型:直接truncate正式分区后导入(保持原有逻辑)
DBUtil
.
engine_exec_sql
(
engine
,
f
"truncate table {export_table};"
)
else
:
print
(
"输入的date_type有误,请检查!!"
)
quit
()
...
...
@@ -240,9 +257,23 @@ if __name__ == '__main__':
(site_name, report_date, status, status_val, table_name, date_type, page, is_end, remark,export_db_type)
VALUES('{site_name}', '{datetime.now().date()}', '导出PG数据库完成', 14, 'us_aba_last_30_day', '30_day', 'ABA搜索词', '是', 'ABA搜索词最近30天表','{db_type}');
"""
elif
date_type
==
DateTypes
.
month
.
name
:
# month类型:Sqoop导入到copy表完成后,通过分区交换替换正式分区,避免空窗期
DBUtil
.
exchange_pg_part_tb
(
engine
,
source_tb_name
=
export_tb_copy
,
part_master_tb
=
export_master_tb
,
part_target_tb
=
export_table_original
,
cp_index_flag
=
False
,
part_val
=
{
"from"
:
[
date_info
],
"to"
:
[
next_val
]})
# 交换完成后删除copy表,节省空间(交换后copy表里存的是旧数据)
DBUtil
.
engine_exec_sql
(
engine
,
f
"drop table if exists {export_tb_copy};"
)
update_workflow_sql
=
f
"""
UPDATE selection.workflow_everyday SET status='导出PG数据库完成', status_val=14,is_end ='是',export_db_type = '{db_type}'
WHERE site_name= '{site_name}' and date_type='{date_type}' and report_date= '{date_info}' and page ='ABA搜索词'
"""
else
:
update_workflow_sql
=
f
"""
UPDATE selection.workflow_everyday SET status='导出PG数据库完成', status_val=14,is_end ='是',export_db_type = '{db_type}'
UPDATE selection.workflow_everyday SET status='导出PG数据库完成', status_val=14,is_end ='是',export_db_type = '{db_type}'
WHERE site_name= '{site_name}' and date_type='{date_type}' and report_date= '{date_info}' and page ='ABA搜索词'
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment