Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
afffe7d3
Commit
afffe7d3
authored
Apr 28, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
cdd9fab4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
5 deletions
+12
-5
img_hdfs_index.py
Pyspark_job/img_search/img_hdfs_index.py
+1
-1
import_st_to_pg14.py
Pyspark_job/listen_program/import_st_to_pg14.py
+11
-4
No files found.
Pyspark_job/img_search/img_hdfs_index.py
View file @
afffe7d3
...
...
@@ -67,7 +67,7 @@ class ImgHdfsIndex(Templates):
def
save_data
(
self
):
with
self
.
engine_doris
.
begin
()
as
conn
:
sql_truncate
=
f
"truncate
{self.db_save}
"
sql_truncate
=
f
"truncate
TABLE {self.db_save};
"
print
(
f
"sql_truncate: {sql_truncate}"
)
conn
.
execute
(
sql_truncate
)
self
.
df_save
.
to_sql
(
self
.
db_save
,
con
=
self
.
engine_doris
,
if_exists
=
"append"
,
index
=
False
)
...
...
Pyspark_job/listen_program/import_st_to_pg14.py
View file @
afffe7d3
...
...
@@ -38,6 +38,7 @@ class ImportStToPg14(object):
"uk"
:
3
,
"de"
:
5
,
}
self
.
fetch_year_month_by_week
()
# 如果传的date_type='week', 将date_info转换成月的值
def
fetch_year_month_by_week
(
self
):
if
self
.
date_type
==
'week'
:
...
...
@@ -45,6 +46,8 @@ class ImportStToPg14(object):
df
=
pd
.
read_sql
(
sql
,
con
=
self
.
engine_mysql
)
self
.
date_info
=
list
(
df
.
year_month
)[
0
]
def
delete_dirty_data
(
self
):
print
(
f
"删除脏数据, 防止失败执行时报错"
)
from
datetime
import
datetime
today_str
=
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d"
)
...
...
@@ -58,7 +61,7 @@ class ImportStToPg14(object):
conn
.
execute
(
sql_delete
)
def
read_data
(
self
):
self
.
fetch_year_month_by_week
()
# 如果传的date_type='week', 将date_info转换成月的值
self
.
delete_dirty_data
()
# 1. 读取date_20_to_30表获取月份对应的周
sql_get_week
=
f
"select year_week, year, week from selection.date_20_to_30 WHERE `year_month`='{self.date_info}' and week_day=1"
df_week
=
pd
.
read_sql
(
sql_get_week
,
con
=
self
.
engine_mysql
)
...
...
@@ -166,8 +169,9 @@ class ImportStToPg14(object):
def
update_workflow_manager
(
self
):
with
self
.
engine_mysql
.
begin
()
as
conn
:
priority
=
self
.
site_name_pri_dict
[
self
.
site_name
]
spider_script
=
f
'ansible dabing_all -f 10 -m shell -a "nohup /usr/local/bin/python3 /mnt/py_spider/threading_spider/Poll_site_search_term_month.py {self.site_name} 2026-04 >/dev/null 2>&1 &";'
update_sql_workflow
=
f
"""
INSERT INTO workflow_manager
INSERT INTO
selection.
workflow_manager
(
workflow_name,
site_name,
...
...
@@ -175,6 +179,7 @@ class ImportStToPg14(object):
date_info,
priority,
spider_name,
spider_script,
spider_is_ready,
spider_state,
bg_name,
...
...
@@ -187,14 +192,16 @@ class ImportStToPg14(object):
'month',
'{self.date_info}',
{priority},
'us_spider_st',
'{self.site_name}_spider_st',
'{spider_script}',
'yes',
1,
'
us
_asin_export',
'
{self.site_name}
_asin_export',
1
)
ON DUPLICATE KEY UPDATE
spider_is_ready = VALUES(spider_is_ready),
spider_script = VALUES(spider_script),
bg_dol_state = VALUES(bg_dol_state),
spider_state = VALUES(spider_state);
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment