Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
61cd5e9d
Commit
61cd5e9d
authored
May 14, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
6b7f0ae7
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
48 additions
and
0 deletions
+48
-0
export_dwt_asin_sync.py
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
+48
-0
No files found.
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
View file @
61cd5e9d
import
math
import
os
import
os
import
sys
import
sys
import
time
import
time
import
traceback
import
traceback
import
pandas
as
pd
sys
.
path
.
append
(
os
.
path
.
dirname
(
sys
.
path
[
0
]))
sys
.
path
.
append
(
os
.
path
.
dirname
(
sys
.
path
[
0
]))
from
utils.secure_db_client
import
get_remote_engine
from
utils.secure_db_client
import
get_remote_engine
...
@@ -106,6 +109,50 @@ def export_data(site_name, date_type, date_info):
...
@@ -106,6 +109,50 @@ def export_data(site_name, date_type, date_info):
)
)
def
get_minid_maxid
(
site_name
,
date_type
,
date_info
):
engine
=
get_remote_engine
(
site_name
=
site_name
,
# -> database "selection"
db_type
=
"postgresql_14"
,
# -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
engine_mysql
=
get_remote_engine
(
site_name
=
site_name
,
# -> database "selection"
db_type
=
"mysql"
,
# -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
# 查询最小和最大 id
print
(
site_name
,
' 查询最小和最大 id'
)
query
=
f
"SELECT MIN(id) AS min_id, MAX(id) AS max_id FROM {site_name}_all_syn_st_month_{date_info.replace('-', '_')} where state in (1,2)"
print
(
query
)
# result = pd.read_sql(query, self.engine_pg)
result
=
engine
.
read_sql
(
query
)
if
result
.
shape
[
0
]
>
0
:
min_id
=
result
[
'min_id'
]
.
values
[
0
]
max_id
=
result
[
'max_id'
]
.
values
[
0
]
# 分组数
num_groups
=
150
group_size
=
math
.
ceil
((
max_id
-
min_id
+
1
)
/
num_groups
)
# 创建分组范围
group_ranges
=
[(
i
,
i
+
group_size
-
1
)
for
i
in
range
(
min_id
,
max_id
+
1
,
group_size
)]
id_list
=
[]
# 打印分组范围
for
group_start
,
group_end
in
group_ranges
:
print
(
f
"Group: {group_start} - {group_end}"
)
if
site_name
==
'us'
:
id_list
.
append
([
f
'{group_start}-{group_end}'
,
date_info
])
else
:
id_list
.
append
([
f
'{group_start}-{group_end}'
,
date_info
])
print
(
id_list
)
df_asin_img_video
=
pd
.
DataFrame
(
data
=
id_list
,
columns
=
[
'minid_maxid'
,
'yaer_month'
])
delete_sql
=
f
'DELETE from {site_name}_syn_asin_all_minid_maxid where state <4'
print
(
'delete_sql::'
,
delete_sql
)
engine_mysql
.
execute
(
delete_sql
)
engine_mysql
.
to_sql
(
df_asin_img_video
,
f
'{site_name}_syn_asin_all_minid_maxid'
,
if_exists
=
'append'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# site_name = 'us'
# site_name = 'us'
# date_type = 'month'
# date_type = 'month'
...
@@ -115,4 +162,5 @@ if __name__ == '__main__':
...
@@ -115,4 +162,5 @@ if __name__ == '__main__':
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data
(
site_name
,
date_type
,
date_info
)
export_data
(
site_name
,
date_type
,
date_info
)
if
site_name
in
[
"us"
,
"uk"
,
"de"
]
and
date_type
==
"month"
:
if
site_name
in
[
"us"
,
"uk"
,
"de"
]
and
date_type
==
"month"
:
get_minid_maxid
(
site_name
,
date_type
,
date_info
)
update_workflow_manager
(
site_name
,
date_type
,
date_info
)
update_workflow_manager
(
site_name
,
date_type
,
date_info
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment