Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
1d3a545e
Commit
1d3a545e
authored
May 06, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
b1660e04
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
14 deletions
+19
-14
export_dwt_asin_sync.py
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
+19
-14
No files found.
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
View file @
1d3a545e
...
@@ -13,9 +13,9 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -13,9 +13,9 @@ def update_workflow_manager(site_name, date_type, date_info):
while
True
:
while
True
:
try
:
try
:
site_name_pri_dict
=
{
site_name_pri_dict
=
{
"us"
:
2
,
"us"
:
1
,
"uk"
:
4
,
"uk"
:
3
,
"de"
:
6
,
"de"
:
5
,
}
}
engine_mysql
=
get_remote_engine
(
engine_mysql
=
get_remote_engine
(
site_name
=
'us'
,
site_name
=
'us'
,
...
@@ -24,7 +24,7 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -24,7 +24,7 @@ def update_workflow_manager(site_name, date_type, date_info):
with
engine_mysql
.
begin
()
as
conn
:
with
engine_mysql
.
begin
()
as
conn
:
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_spider_month.py {site_name} {date_type} {date_info} >/dev/null 2>&1 &";'
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_spider_month.py {site_name} {date_type} {date_info} >/dev/null 2>&1 &";'
priority
=
site_name_pri_dict
[
site_name
]
priority
=
site_name_pri_dict
[
site_name
]
update_sql_workflow
=
f
"""
update_sql_workflow
_spider
=
f
"""
INSERT INTO selection.workflow_manager
INSERT INTO selection.workflow_manager
(
(
workflow_name,
workflow_name,
...
@@ -37,7 +37,7 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -37,7 +37,7 @@ def update_workflow_manager(site_name, date_type, date_info):
spider_is_ready,
spider_is_ready,
spider_state,
spider_state,
bg_name,
bg_name,
bg_dol_state,
bg_dol_state,
finished_count
finished_count
)
)
VALUES
VALUES
...
@@ -51,22 +51,27 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -51,22 +51,27 @@ def update_workflow_manager(site_name, date_type, date_info):
'{spider_script}',
'{spider_script}',
'yes',
'yes',
1,
1,
'{site_name}_all_cal',
'{site_name}_all_cal',
3,
3,
0
0
)
)
ON DUPLICATE KEY UPDATE
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
spider_script = VALUES(spider_script),
spider_state = VALUES(spider_state),
spider_state = VALUES(spider_state),
bg_dol_state = VALUES(bg_dol_state),
bg_dol_state = VALUES(bg_dol_state);
finished_count = COALESCE(finished_count, 0) + 1;
"""
"""
print
(
f
"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}"
)
print
(
f
"workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow_spider}"
)
conn
.
execute
(
update_sql_workflow
)
conn
.
execute
(
update_sql_workflow_spider
)
update_sql_workflow_bg
=
f
"""update selection.workflow_manager
set bg_dol_state=3, finished_count=COALESCE(finished_count, 0) + 1
WHERE workflow_name='月全流程' and site_name='{site_name}' and date_type='{date_type}' and date_info='{date_info}' and priority={priority}"""
print
(
f
"workflow_manager进度表---更新asin导出进度和完成次数: {update_sql_workflow_bg}"
)
conn
.
execute
(
update_sql_workflow_bg
)
break
break
except
Exception
as
e
:
except
Exception
as
e
:
print
(
f
"失败:workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow}, {e}, 报错信息: {traceback.format_exc()}"
)
print
(
f
"失败:workflow_manager进度表---重置爬虫的asin抓取进度: {update_sql_workflow
_spider
}, {e}, 报错信息: {traceback.format_exc()}"
)
time
.
sleep
(
300
)
time
.
sleep
(
300
)
continue
continue
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment