asin_image_local_path.py
2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import sys
import time
import traceback
import pandas as pd
os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
sys.path.append(os.path.dirname(sys.path[0])) # 上级目录
from utils.templates_mysql import TemplatesMysql
from utils.db_util import DbTypes, DBUtil
class AsinImageLocalPath(object):
def __init__(self, site_name='us', asin_type=1):
self.site_name = site_name
self.asin_type = asin_type
self.get_first_local_dir()
self.engine_srs = DBUtil.get_db_engine(db_type=DbTypes.srs.name, site_name=self.site_name)
def get_first_local_dir(self):
if self.asin_type == 1:
self.first_local_dir = f"/mnt/data/img_data/amazon_self/{self.site_name}"
elif self.asin_type == 2:
self.first_local_dir = f"/mnt/data/img_data/amazon/{self.site_name}"
else:
quit()
def read_data(self):
if self.asin_type == 1:
sql = f"select asin from selection.{self.site_name}_self_asin_image;"
elif self.asin_type == 2:
sql = f"select asin from selection.{self.site_name}_self_asin_image;"
else:
sql = ""
df = pd.read_sql(sql, con=self.engine_srs)
print(f"sql: {sql}", df.shape)
return df
def handle_data(self, df):
df["local_path"] = df.asin.apply(lambda x: f"{self.first_local_dir}/{x[:1]}/{x[:2]}/{x[:3]}/{x[:4]}/{x[:5]}/{x[:6]}/{x}.jpg")
df["asin_type"] = self.asin_type
df["site_name"] = self.site_name
return df
def save_data(self, df):
df.to_sql("asin_image_local_path", con=self.engine_srs, if_exists="append", index=False)
def run(self):
df = self.read_data()
df = self.handle_data(df)
self.save_data(df)
if __name__ == '__main__':
# site_name = sys.argv[1] # 参数1:站点
# asin_type = int(sys.argv[2]) # 参数2:asin类型来源
site_name = 'us'
asin_type = 1
handle_obj = AsinImageLocalPath(site_name=site_name, asin_type=asin_type)
handle_obj.run()