Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
df8b11be
Commit
df8b11be
authored
Apr 29, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
ae9f5874
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
20 deletions
+42
-20
search_test.py
Pyspark_job/img_search/search_test.py
+38
-17
export_dwt_asin_sync.py
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
+4
-3
No files found.
Pyspark_job/img_search/search_test.py
View file @
df8b11be
import
os
import
requests
import
requests
from
contextlib
import
ExitStack
# 服务器地址
url
=
'http://192.168.10.217:10001/img_search'
url
=
'http://192.168.10.217:10001/img_search'
# 文件路径列表,上传多个文件
file_paths
=
[
file_paths
=
[
'D:
\
Amazon-Selection
\
pyspark_job
\
image_search
\
img/1.jpg'
,
# r'D:\Amazon-Selection\pyspark_job\image_search\img\t1_1.png',
# 'D:\Amazon-Selection\pyspark_job\image_search\img/1.png',
r'D:\Amazon-Selection\pyspark_job\image_search\img\t1_2.png'
,
'D:
\
Amazon-Selection
\
pyspark_job
\
image_search
\
img/2.png'
,
]
]
# 将多个文件添加到请求中
files
=
[(
'file'
,
(
open
(
file_path
,
'rb'
)))
for
file_path
in
file_paths
]
# 其他表单数据
data
=
{
data
=
{
'site_name'
:
'us'
,
'site_name'
:
'us'
,
'img_type'
:
'amazon_inv'
,
'img_type'
:
'amazon_inv'
,
'search_key'
:
'file'
,
# 使用文件方式进行查询
'search_key'
:
'file'
,
'search_value'
:
''
,
# 在图片查询中可忽略
'search_value'
:
''
,
'top_k'
:
5
# 设置查询结果的返回数量
'top_k'
:
'5'
}
}
# 发送请求
with
ExitStack
()
as
stack
:
response
=
requests
.
post
(
url
,
files
=
files
,
data
=
data
)
files
=
[]
for
idx
,
file_path
in
enumerate
(
file_paths
,
start
=
1
):
files
.
append
(
(
f
'file{idx}'
,
(
os
.
path
.
basename
(
file_path
),
stack
.
enter_context
(
open
(
file_path
,
'rb'
)),
'image/png'
)
)
)
response
=
requests
.
post
(
url
,
files
=
files
,
data
=
data
,
timeout
=
120
)
# 打印响应
# print(response.status_code)
# print(response.text)
# if response.status_code == 200:
# print(response.json())
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
print
(
response
.
json
())
result_list
=
response
.
json
()
for
item
in
result_list
:
print
(
"id:"
,
item
.
get
(
"id"
))
print
(
"img_id:"
,
item
.
get
(
"img_id"
))
print
(
"img_type:"
,
item
.
get
(
"img_type"
))
print
(
"similarity:"
,
item
.
get
(
"similarity"
))
print
(
"img_url:"
,
item
.
get
(
"img_url"
))
print
(
"-"
*
80
)
else
:
else
:
print
(
f
"Error: {response.status_code}, {response.text}"
)
print
(
f
"Error: {response.status_code}, {response.text}"
)
\ No newline at end of file
Pyspark_job/sqoop_export/export_dwt_asin_sync.py
View file @
df8b11be
...
@@ -42,10 +42,10 @@ def update_workflow_manager(site_name, date_type, date_info):
...
@@ -42,10 +42,10 @@ def update_workflow_manager(site_name, date_type, date_info):
'month',
'month',
'{date_info}',
'{date_info}',
{priority},
{priority},
'
us
_spider_asin',
'
{site_name}
_spider_asin',
'yes',
'yes',
1,
1,
'
us
_all_cal',
'
{site_name}
_all_cal',
1
1
)
)
ON DUPLICATE KEY UPDATE
ON DUPLICATE KEY UPDATE
...
@@ -97,4 +97,5 @@ if __name__ == '__main__':
...
@@ -97,4 +97,5 @@ if __name__ == '__main__':
date_type
=
sys
.
argv
[
2
]
# 参数2:类型:week/4_week/month/quarter/day
date_type
=
sys
.
argv
[
2
]
# 参数2:类型:week/4_week/month/quarter/day
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
date_info
=
sys
.
argv
[
3
]
# 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data
(
site_name
,
date_type
,
date_info
)
export_data
(
site_name
,
date_type
,
date_info
)
update_workflow_manager
(
site_name
,
date_type
,
date_info
)
if
site_name
in
[
"us"
,
"uk"
,
"de"
]
and
date_type
==
"month"
:
update_workflow_manager
(
site_name
,
date_type
,
date_info
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment