Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
b1660e04
Commit
b1660e04
authored
May 06, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
e7745fcc
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
33 deletions
+42
-33
import_st_to_pg14.py
Pyspark_job/listen_program/import_st_to_pg14.py
+1
-1
export_dwt_asin_sync.py
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
+41
-32
No files found.
Pyspark_job/listen_program/import_st_to_pg14.py
View file @
b1660e04
...
@@ -173,7 +173,7 @@ class ImportStToPg14(object):
...
@@ -173,7 +173,7 @@ class ImportStToPg14(object):
def
update_workflow_manager
(
self
):
def
update_workflow_manager
(
self
):
with
self
.
engine_mysql
.
begin
()
as
conn
:
with
self
.
engine_mysql
.
begin
()
as
conn
:
priority
=
self
.
site_name_pri_dict
[
self
.
site_name
]
priority
=
self
.
site_name_pri_dict
[
self
.
site_name
]
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name}
2026-04
>/dev/null 2>&1 &";'
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name}
{self.date_info}
>/dev/null 2>&1 &";'
update_sql_workflow
=
f
"""
update_sql_workflow
=
f
"""
INSERT INTO selection.workflow_manager
INSERT INTO selection.workflow_manager
(
(
...
...
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
View file @
b1660e04
...
@@ -8,6 +8,7 @@ from utils.secure_db_client import get_remote_engine
...
@@ -8,6 +8,7 @@ from utils.secure_db_client import get_remote_engine
def
update_workflow_manager
(
site_name
,
date_type
,
date_info
):
def
update_workflow_manager
(
site_name
,
date_type
,
date_info
):
print
(
f
"当前执行的参数: {site_name, date_type, date_info}"
)
if
date_type
==
"month"
:
if
date_type
==
"month"
:
while
True
:
while
True
:
try
:
try
:
...
@@ -21,38 +22,46 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -21,38 +22,46 @@ def update_workflow_manager(site_name, date_type, date_info):
db_type
=
'mysql'
db_type
=
'mysql'
)
)
with
engine_mysql
.
begin
()
as
conn
:
with
engine_mysql
.
begin
()
as
conn
:
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_spider_month.py {site_name} {date_type} {date_info} >/dev/null 2>&1 &";'
priority
=
site_name_pri_dict
[
site_name
]
priority
=
site_name_pri_dict
[
site_name
]
update_sql_workflow
=
f
"""
update_sql_workflow
=
f
"""
INSERT INTO workflow_manager
INSERT INTO selection.workflow_manager
(
(
workflow_name,
workflow_name,
site_name,
site_name,
date_type,
date_type,
date_info,
date_info,
priority,
priority,
spider_name,
spider_name,
spider_is_ready,
spider_script,
spider_state,
spider_is_ready,
bg_name,
spider_state,
bg_dol_state
bg_name,
)
bg_dol_state,
VALUES
finished_count
(
)
'月全流程',
VALUES
'{site_name}',
(
'month',
'月全流程',
'{date_info}',
'{site_name}',
{priority},
'month',
'{site_name}_spider_asin',
'{date_info}',
'yes',
{priority},
1,
'{site_name}_spider_asin',
'{site_name}_all_cal',
'{spider_script}',
1
'yes',
)
1,
ON DUPLICATE KEY UPDATE
'{site_name}_all_cal',
spider_is_ready = VALUES(spider_is_ready),
3,
spider_state = VALUES(spider_state);
0
"""
)
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
spider_state = VALUES(spider_state),
bg_dol_state = VALUES(bg_dol_state),
finished_count = COALESCE(finished_count, 0) + 1;
"""
print
(
f
"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}"
)
print
(
f
"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}"
)
conn
.
execute
(
update_sql_workflow
)
conn
.
execute
(
update_sql_workflow
)
break
break
...
@@ -99,5 +108,5 @@ if __name__ == '__main__':
...
@@ -99,5 +108,5 @@ if __name__ == '__main__':
date_type
=
sys
.
argv
[
2
]
# 参数2:类型:week/4_week/month/quarter/day
date_type
=
sys
.
argv
[
2
]
# 参数2:类型:week/4_week/month/quarter/day
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data
(
site_name
,
date_type
,
date_info
)
export_data
(
site_name
,
date_type
,
date_info
)
#
if site_name in ["us", "uk", "de"] and date_type == "month":
if
site_name
in
[
"us"
,
"uk"
,
"de"
]
and
date_type
==
"month"
:
#
update_workflow_manager(site_name, date_type, date_info)
update_workflow_manager
(
site_name
,
date_type
,
date_info
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment