Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
spider
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
selection-new
spider
Commits
a5677f43
Commit
a5677f43
authored
Aug 01, 2025
by
Peng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
000d315d
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
171 additions
and
42 deletions
+171
-42
ocr.py
py_spider/Servers/ocr.py
+8
-1
get_junglescout_rank.py
py_spider/amazon_every_day_spider/get_junglescout_rank.py
+7
-3
params.py
py_spider/amazon_params/params.py
+34
-34
asin_detail_pg.py
py_spider/amazon_spider/asin_detail_pg.py
+1
-1
search_term_test.py
py_spider/amazon_spider/search_term_test.py
+3
-3
secure_db_client.py
py_spider/utils/secure_db_client.py
+118
-0
No files found.
py_spider/Servers/ocr.py
View file @
a5677f43
...
@@ -7,6 +7,12 @@ from flask import Flask, request, jsonify
...
@@ -7,6 +7,12 @@ from flask import Flask, request, jsonify
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
logging
.
getLogger
(
'ppocr'
)
.
setLevel
(
logging
.
ERROR
)
logging
.
getLogger
(
'ppocr'
)
.
setLevel
(
logging
.
ERROR
)
# 全局只实例化一次
ocr_instances
=
{}
def
get_ocr
(
language
):
if
language
not
in
ocr_instances
:
ocr_instances
[
language
]
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
language
)
return
ocr_instances
[
language
]
def
OCR
(
language
,
url
):
def
OCR
(
language
,
url
):
...
@@ -14,7 +20,8 @@ def OCR(language, url):
...
@@ -14,7 +20,8 @@ def OCR(language, url):
# language = 'en'
# language = 'en'
# url = 'http://soundasia.oss-cn-shenzhen.aliyuncs.com/product_img/2024/12/05/EQ6368_1733363300_%E7%94%BB%E6%9D%BF%203.jpg'
# url = 'http://soundasia.oss-cn-shenzhen.aliyuncs.com/product_img/2024/12/05/EQ6368_1733363300_%E7%94%BB%E6%9D%BF%203.jpg'
# path = r'D:\新建文件夹\requests_files'
# path = r'D:\新建文件夹\requests_files'
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
language
)
# ocr = PaddleOCR(use_angle_cls=True, lang=language)
ocr
=
get_ocr
(
language
)
result
=
ocr
.
ocr
(
url
,
cls
=
True
)
result
=
ocr
.
ocr
(
url
,
cls
=
True
)
data_list
=
[]
data_list
=
[]
for
idx
in
range
(
len
(
result
)):
for
idx
in
range
(
len
(
result
)):
...
...
py_spider/amazon_every_day_spider/get_junglescout_rank.py
View file @
a5677f43
...
@@ -316,6 +316,10 @@ def junglescout_spider(db_base):
...
@@ -316,6 +316,10 @@ def junglescout_spider(db_base):
# cursor = db.cursor(cursor=pymysql.cursors.DictCursor) # 以字典的格式输出
# cursor = db.cursor(cursor=pymysql.cursors.DictCursor) # 以字典的格式输出
print
(
category_name
[
'name'
],
category_name
[
'c_id'
])
print
(
category_name
[
'name'
],
category_name
[
'c_id'
])
name_rnak_list
=
[]
name_rnak_list
=
[]
up_sql
=
f
"UPDATE all_site_category set state=2 WHERE site='{db_base}' and state=1 and c_id='{category_name['c_id']}'"
print
(
'更新状态:'
,
up_sql
)
cursor_us_mysql_db
.
execute
(
up_sql
)
db_us
.
commit
()
rank_list
=
[
1
,
10
,
30
,
50
,
100
,
200
,
300
,
400
,
500
,
600
,
700
,
800
,
900
,
1000
,
1100
,
1200
,
1300
,
1400
,
1500
,
rank_list
=
[
1
,
10
,
30
,
50
,
100
,
200
,
300
,
400
,
500
,
600
,
700
,
800
,
900
,
1000
,
1100
,
1200
,
1300
,
1400
,
1500
,
1600
,
1600
,
1700
,
1700
,
...
@@ -350,12 +354,12 @@ def junglescout_spider(db_base):
...
@@ -350,12 +354,12 @@ def junglescout_spider(db_base):
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Encoding"
:
"gzip, deflate, br, zstd"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Accept-Language"
:
"zh-CN,zh-TW;q=0.9,zh;q=0.8"
,
"Cache-Control"
:
"no-cache"
,
"Cache-Control"
:
"no-cache"
,
'Cookie'
:
'
_ga=GA1.1.522737765.1749119222; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=2y5KvHOzkFTlJAhOLENKAKWsOeb; MEIQIA_VISIT_ID=2y5KvGrMsL4O61rUcCdsLjChlRa; current_guest=r0hgXGqjbSw0_250605-186810; ecookie=xOHgcnYmcZIZKG0z_CN; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1752031904,1752460043,1752653436,1753353401; HMACCOUNT=800EBCCFB4C6BBFB; 894cdd1d9741ce0c9757=827b7d3d13ed7bd6b4b1b24d0246b3dc; 3d854e1bcd61963fdf05=38fcb3b742a48aa345ddfd7136bc60ee; _gaf_fp=f297033bfe53aa9891ffe2842271566b; _gcl_au=1.1.420472597.1749119222.1054917286.1753685435.1753685437; rank-guest-user=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9lihUCQIaVmrHXjbpSRP/Ca0F; rank-login-user=6303473571KK6FnhfedvWg9tSSyk3xj2GRIc/8HSm4vuPYVHI5vKLXnssgei5ccK1dG8fkQSFI; rank-login-user-info=eyJuaWNrbmFtZSI6IuW4heWTpSIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTgzKioqKjczNDciLCJ0b2tlbiI6IjYzMDM0NzM1NzFLSzZGbmhmZWR2V2c5dFNTeWszeGoyR1JJYy84SFNtNHZ1UFlWSEk1dktMWG5zc2dlaTVjY0sxZEc4ZmtRU0ZJIn0=; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJKc2pZSlZWeFZzTVptVWFvMzgtZ3RRIiwiaWF0IjoxNzUzNjg1NDM2LCJleHAiOjE3NTM3NzE4MzYsIm5iZiI6MTc1MzY4NTM3Niwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIxNSwicGkiOm51bGwsIm5uIjoi5biF5ZOlIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxODMwNzk2NzM0NyIsImVtIjoiMzE1OTk4MDg5MkBxcS5jb20iLCJtbCI6IkcifQ.EaQ7Md7iVOpjZDogkiS2DlndhFPt3GzL2t33LXnh9Z5Itr3A8scFM_tzrYuzXqF6a-BDIMFe90SdDtU18zs9WTTl6_Phv3AEqcDe6WDfPAhB_KMa15VYAE5-b9d3lgIukKR8ZZyAMpiJzcmIWShmqxrhCNQD0ER3b7idaJpSrJiKnwV-tj6La52WJ6BmVRAk8gst0p5h-SYVnNz9iNaSXLc2Dx-hHZvMVNU27yfbJgKPpzRxgh7TOD7O-cT0WrEoKvTSw9e81gG9bgvKuA_bD-z3ePhgM6prUfceWszD88KH8PcXua9s_8ZM4bgrMyKMHswLtwyLhWePcvtHUp6yyQ; ao_lo_to_n=6303473571KK6FnhfedvWg9tSSyk3xj0WOO7cLm/YtvwwmR8H9liibP9br/hwQ1Dlb4xDZyVPrTQIst5JCVz4PpnUIlDMGE07YVPYBWOm3Hrx4PaVkgaQ=; _ga_38NCVF2XST=GS2.1.s1753685428$o61$g1$t1753685444$j44$l0$h984121357; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1753685445; _ga_CN0F80S6GL=GS2.1.s1753685429$o59$g1$t1753685445$j44$l0$h0; JSESSIONID=F09543D3A3D6F890BAD0F422FCA49942
'
,
'Cookie'
:
'
Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1754014538; HMACCOUNT=9F9252C9CBCC28DF; _gcl_au=1.1.1089500616.1754014538; _ga=GA1.1.420464702.1754014538; MEIQIA_TRACK_ID=30fNCDIKt41VFprESpAsdxA93ss; MEIQIA_VISIT_ID=30fNCEDPMElClEdkflJq4o3vq1u; ecookie=vR8uQMtRUHf2GPuw_CN; e6d47b7933e377ecd062=54dab66cb6737167d2467f75c45d482f; _fp=65dbbe41a37f8f9fbe702eba96328267; _gaf_fp=909522d879232499acd9b4ddab672d19; current_guest=hsba8eOK1Dg5_250801-107899; rank-login-user=4412704571jC8vqVc/Rw3YJBDFuUDJtYCCpovzEIJtbd/qlmC8t917Mll118BEKfWZetMkVyfW; rank-login-user-info="eyJuaWNrbmFtZSI6IuWViuWTiOWTiOWTiCIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTUzKioqKjEyNzAiLCJ0b2tlbiI6IjQ0MTI3MDQ1NzFqQzh2cVZjL1J3M1lKQkRGdVVESnRZQ0Nwb3Z6RUlKdGJkL3FsbUM4dDkxN01sbDExOEJFS2ZXWmV0TWtWeWZXIn0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJHOWQwOEtTcnVjdEgwcXZWZm1XNnlBIiwiaWF0IjoxNzU0MDE0NTQ0LCJleHAiOjE3NTQxMDA5NDQsIm5iZiI6MTc1NDAxNDQ4NCwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIwMSwicGkiOm51bGwsIm5uIjoi5ZWK5ZOI5ZOI5ZOIIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxNTM2ODA1MTI3MCIsImVtIjoibWVpeW91bGFAbWVpeW91bGEuY29tIiwibWwiOiJHIn0.Cwkc0tf7KniQbUgRyiZw8UED5dm3y8dOrK04ejg4a45H-W3FEBpQ6ERU8V7TTy2qKOJf8j1swyVxRIqJDrGRSwe4FBr8EKLsoZtxRe6DR0LYGx8xMmWmfUmVmwcBHR2M62RZlDO-fjvVPBuZwcLyUuslq2PZen2ugOUzdfQDHQJV8UMmWUvt1zHjjQZrRlda1tK0_TuHt8dBCZ-sC_CIooCAvXYYfMUSMeT2w_QmgFPc_EIozNKvv7EDzqisT4pR5AWKDdfoVUSWFBIVNwoulIMdtKLsVrlL8Xiq_2l3mG9NCfE0recVIGCRhV52lwWD3vT1O3bpCT-usWv0hXVgZA; ao_lo_to_n="4412704571jC8vqVc/Rw3YJBDFuUDJtakIKSNY+NxiJnARSLNieFEjr7klDXJb6hxls+GbtooUDUaltNvx27xhoy1Atnktnv2cJc/ZHsk63L1rTYoE0Cs="; rank-guest-user=5412704571Cjn566YjkeJT1oMWIEZzHnz3datXkhqNVFcNKbPORkfvRQdV46mvORTmyl8ul2JV; _ga_38NCVF2XST=GS2.1.s1754014538$o1$g1$t1754014551$j47$l0$h1907321182; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1754014552; _ga_CN0F80S6GL=GS2.1.s1754014539$o1$g1$t1754014552$j47$l0$h0; JSESSIONID=A5AEB0B286BCE5A27600AD3BD1DD6445
'
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
"User-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
,
}
}
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
url
=
"https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
data
=
{
data
=
{
"station"
:
"
DE
"
,
"station"
:
"
UK
"
,
"cid"
:
category_name
[
'c_id'
],
# 分类id
"cid"
:
category_name
[
'c_id'
],
# 分类id
"bsr"
:
f
"{i}"
# 排名
"bsr"
:
f
"{i}"
# 排名
}
}
...
@@ -431,7 +435,7 @@ def save_site_category(site_bsr_dict=None):
...
@@ -431,7 +435,7 @@ def save_site_category(site_bsr_dict=None):
def
run
():
def
run
():
# get_cid()
# get_cid()
junglescout_spider
(
'
de
'
)
junglescout_spider
(
'
uk
'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
py_spider/amazon_params/params.py
View file @
a5677f43
...
@@ -296,40 +296,40 @@ else:
...
@@ -296,40 +296,40 @@ else:
"pg_host"
:
"61.145.136.61"
,
"pg_host"
:
"61.145.136.61"
,
}
}
# 连接pg6数据库参数 113.100.143.162:5432
# 连接pg6数据库参数 113.100.143.162:5432
弃用
if
platform
.
system
()
.
lower
()
==
'windows'
:
#
if platform.system().lower() == 'windows':
PG_CONN_DICT_6
=
{
#
PG_CONN_DICT_6 = {
"pg_port"
:
5432
,
#
"pg_port": 5432,
"pg_db"
:
"selection"
,
#
"pg_db": "selection",
"pg_user"
:
"postgres"
,
#
"pg_user": "postgres",
"pg_pwd"
:
"F9kL2sXe81rZq"
,
#
"pg_pwd": "F9kL2sXe81rZq",
"pg_host"
:
"113.100.143.162"
,
#
"pg_host": "113.100.143.162",
}
#
}
else
:
#
else:
PG_CONN_DICT_6
=
{
#
PG_CONN_DICT_6 = {
"pg_port"
:
5432
,
#
"pg_port": 5432,
"pg_db"
:
"selection"
,
#
"pg_db": "selection",
"pg_user"
:
"postgres"
,
#
"pg_user": "postgres",
"pg_pwd"
:
"F9kL2sXe81rZq"
,
#
"pg_pwd": "F9kL2sXe81rZq",
"pg_host"
:
"113.100.143.162"
,
#
"pg_host": "113.100.143.162",
}
#
}
# 连接pg12数据库参数 113.100.143.162:5443
# 连接pg12数据库参数 113.100.143.162:5443
弃用
if
platform
.
system
()
.
lower
()
==
'windows'
:
#
if platform.system().lower() == 'windows':
PG_CONN_DICT_21
=
{
#
PG_CONN_DICT_21 = {
"pg_port"
:
5443
,
#
"pg_port": 5443,
"pg_db"
:
"selection"
,
#
"pg_db": "selection",
"pg_user"
:
"postgres"
,
#
"pg_user": "postgres",
"pg_pwd"
:
"F9kL2sXe81rZq"
,
#
"pg_pwd": "F9kL2sXe81rZq",
"pg_host"
:
"113.100.143.162"
,
#
"pg_host": "113.100.143.162",
}
#
}
else
:
#
else:
PG_CONN_DICT_21
=
{
#
PG_CONN_DICT_21 = {
"pg_port"
:
5443
,
#
"pg_port": 5443,
"pg_db"
:
"selection"
,
#
"pg_db": "selection",
"pg_user"
:
"postgres"
,
#
"pg_user": "postgres",
"pg_pwd"
:
"F9kL2sXe81rZq"
,
#
"pg_pwd": "F9kL2sXe81rZq",
"pg_host"
:
"113.100.143.162"
,
#
"pg_host": "113.100.143.162",
}
#
}
# doris
# doris
if
platform
.
system
()
.
lower
()
==
'windows'
:
if
platform
.
system
()
.
lower
()
==
'windows'
:
...
...
py_spider/amazon_spider/asin_detail_pg.py
View file @
a5677f43
...
@@ -477,7 +477,7 @@ class async_asin_pg():
...
@@ -477,7 +477,7 @@ class async_asin_pg():
def
run
(
self
):
def
run
(
self
):
asin_list
=
self
.
save_asin_detail
.
read_db_data
()
asin_list
=
self
.
save_asin_detail
.
read_db_data
()
# asin_list = ['B0
BPKK2BMN
|2025-01|1|1|null|null']
# asin_list = ['B0
DSBTYG6W
|2025-01|1|1|null|null']
if
asin_list
:
if
asin_list
:
for
asin
in
asin_list
:
for
asin
in
asin_list
:
self
.
queries_asin_queue
.
put
(
asin
)
self
.
queries_asin_queue
.
put
(
asin
)
...
...
py_spider/amazon_spider/search_term_test.py
View file @
a5677f43
...
@@ -30,7 +30,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
...
@@ -30,7 +30,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class
search_temp_pg
(
BaseUtils
):
class
search_temp_pg
(
BaseUtils
):
def
__init__
(
self
,
site_name
=
'u
s
'
,
read_size
=
300
,
proxy_name
=
None
,
week
=
None
,
month
=
None
):
def
__init__
(
self
,
site_name
=
'u
k
'
,
read_size
=
300
,
proxy_name
=
None
,
week
=
None
,
month
=
None
):
super
()
.
__init__
()
super
()
.
__init__
()
self
.
site_name
=
site_name
# 站点
self
.
site_name
=
site_name
# 站点
self
.
month
=
month
self
.
month
=
month
...
@@ -237,7 +237,7 @@ class search_temp_pg(BaseUtils):
...
@@ -237,7 +237,7 @@ class search_temp_pg(BaseUtils):
self
.
kafuka_producer
.
close
()
self
.
kafuka_producer
.
close
()
def
run_pol
(
self
):
def
run_pol
(
self
):
search_term_list
=
[
"1|-|srs|-|https://www.amazon.co
m/s?me=AI6OQ4YYFMZUE&marketplaceID=ATVPDKIKX0DER
"
]
search_term_list
=
[
"1|-|srs|-|https://www.amazon.co
.uk/s?k=power
%20
shower&page=1
"
]
if
search_term_list
:
if
search_term_list
:
if
self
.
cookies_queue
.
empty
():
if
self
.
cookies_queue
.
empty
():
cookies_dict
=
self
.
reuests_para_val
.
get_cookie
()
cookies_dict
=
self
.
reuests_para_val
.
get_cookie
()
...
@@ -247,7 +247,7 @@ class search_temp_pg(BaseUtils):
...
@@ -247,7 +247,7 @@ class search_temp_pg(BaseUtils):
for
search_url
in
search_term_list
:
for
search_url
in
search_term_list
:
self
.
search_term_queue
.
put
(
search_url
)
self
.
search_term_queue
.
put
(
search_url
)
html_thread
=
[]
html_thread
=
[]
for
i
in
range
(
1
5
):
for
i
in
range
(
1
):
thread2
=
threading
.
Thread
(
target
=
self
.
get_search_kw
,
args
=
(
i
,))
thread2
=
threading
.
Thread
(
target
=
self
.
get_search_kw
,
args
=
(
i
,))
html_thread
.
append
(
thread2
)
html_thread
.
append
(
thread2
)
for
ti
in
html_thread
:
for
ti
in
html_thread
:
...
...
py_spider/utils/secure_db_client.py
0 → 100644
View file @
a5677f43
import
pandas
as
pd
import
requests
,
time
from
typing
import
List
# -------- 映射字典 --------
site_name_db_dict
=
{
"us"
:
"selection"
,
"uk"
:
"selection_uk"
,
"de"
:
"selection_de"
,
"es"
:
"selection_es"
,
"fr"
:
"selection_fr"
,
"it"
:
"selection_it"
,
}
db_type_alias_map
=
{
"mysql"
:
"mysql"
,
# 阿里云mysql
"postgresql_14"
:
"postgresql_14"
,
# pg14爬虫库-内网
"postgresql_14_outer"
:
"postgresql_14_outer"
,
# pg14爬虫库-外网
"postgresql_15"
:
"postgresql_15"
,
# pg15正式库-内网
"postgresql_15_outer"
:
"postgresql_15_outer"
,
# pg15正式库-外网
"postgresql_cluster"
:
"postgresql_cluster"
,
# pg集群-内网
"postgresql_cluster_outer"
:
"postgresql_cluster_outer"
,
# pg集群-外网
"doris"
:
"doris"
,
# pg集群-内网
}
DEFAULT_SERVERS
=
[
"http://192.168.200.210:7777"
,
"http://192.168.1.102:7777"
,
]
# ---------------------------
class
RemoteTransaction
:
def
__init__
(
self
,
db
:
str
,
database
:
str
,
session
:
requests
.
Session
,
urls
:
List
[
str
]):
self
.
db
=
db
self
.
database
=
database
self
.
session
=
session
self
.
urls
=
urls
self
.
sql_queue
=
[]
def
execute
(
self
,
sql
:
str
):
self
.
sql_queue
.
append
(
sql
)
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exc_type
,
exc
,
tb
):
for
url
in
self
.
urls
:
try
:
self
.
session
.
post
(
url
+
"/transaction"
,
json
=
{
"db"
:
self
.
db
,
"sql_list"
:
self
.
sql_queue
,
"site_name"
:
self
.
database
},
# site_name not needed on server, kept for clarity
timeout
=
15
,
)
.
raise_for_status
()
return
except
Exception
as
e
:
print
(
f
"[WARN] 事务失败 {url}: {e}"
)
raise
RuntimeError
(
"All servers failed for transaction"
)
class
RemoteEngine
:
def
__init__
(
self
,
db
:
str
,
database
:
str
,
server_urls
:
List
[
str
],
retries
:
int
=
2
):
self
.
db
=
db
self
.
database
=
database
self
.
urls
=
[
u
.
rstrip
(
"/"
)
for
u
in
server_urls
]
self
.
session
=
requests
.
Session
()
self
.
retries
=
retries
def
_request
(
self
,
endpoint
:
str
,
payload
):
for
url
in
self
.
urls
:
for
_
in
range
(
self
.
retries
):
try
:
r
=
self
.
session
.
post
(
f
"{url}/{endpoint}"
,
json
=
payload
,
timeout
=
10
)
r
.
raise_for_status
()
return
r
.
json
()
except
Exception
as
e
:
print
(
f
"[WARN] {endpoint} fail @ {url}: {e}"
)
time
.
sleep
(
1
)
raise
RuntimeError
(
f
"All servers failed for {endpoint}"
)
# ---------- 公共 API ----------
def
read_sql
(
self
,
sql
:
str
)
->
pd
.
DataFrame
:
data
=
self
.
_request
(
"query"
,
{
"db"
:
self
.
db
,
"sql"
:
sql
,
"site_name"
:
self
.
database
})
return
pd
.
DataFrame
(
data
[
"result"
])
def
to_sql
(
self
,
df
:
pd
.
DataFrame
,
table
:
str
,
if_exists
=
"append"
):
return
self
.
_request
(
"insert"
,
{
"db"
:
self
.
db
,
"table"
:
table
,
"if_exists"
:
if_exists
,
"data"
:
df
.
to_dict
(
"records"
),
"site_name"
:
self
.
database
})
def
begin
(
self
):
return
RemoteTransaction
(
self
.
db
,
self
.
database
,
self
.
session
,
self
.
urls
)
# ---------------------------------
def
get_remote_engine
(
site_name
:
str
,
db_type
:
str
,
servers
:
List
[
str
]
=
None
)
->
RemoteEngine
:
if
site_name
not
in
site_name_db_dict
:
raise
ValueError
(
f
"Unknown site_name: {site_name}"
)
if
db_type
not
in
db_type_alias_map
:
raise
ValueError
(
f
"Unknown db_type: {db_type}"
)
return
RemoteEngine
(
db
=
db_type_alias_map
[
db_type
],
database
=
site_name
,
server_urls
=
servers
or
DEFAULT_SERVERS
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment