import os import sys import time import traceback import pandas as pd os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" sys.path.append(os.path.dirname(sys.path[0])) # 上级目录 from utils.templates_mysql import TemplatesMysql from utils.db_util import DbTypes, DBUtil class AsinImageLocalPath(object): def __init__(self, site_name='us', asin_type=1): self.site_name = site_name self.asin_type = asin_type self.get_first_local_dir() self.engine_srs = DBUtil.get_db_engine(db_type=DbTypes.srs.name, site_name=self.site_name) def get_first_local_dir(self): if self.asin_type == 1: self.first_local_dir = f"/mnt/data/img_data/amazon_self/{self.site_name}" elif self.asin_type == 2: self.first_local_dir = f"/mnt/data/img_data/amazon/{self.site_name}" else: quit() def read_data(self): if self.asin_type == 1: sql = f"select asin from selection.{self.site_name}_self_asin_image;" elif self.asin_type == 2: sql = f"select asin from selection.{self.site_name}_self_asin_image;" else: sql = "" df = pd.read_sql(sql, con=self.engine_srs) print(f"sql: {sql}", df.shape) return df def handle_data(self, df): df["local_path"] = df.asin.apply(lambda x: f"{self.first_local_dir}/{x[:1]}/{x[:2]}/{x[:3]}/{x[:4]}/{x[:5]}/{x[:6]}/{x}.jpg") df["asin_type"] = self.asin_type df["site_name"] = self.site_name return df def save_data(self, df): df.to_sql("asin_image_local_path", con=self.engine_srs, if_exists="append", index=False) def run(self): df = self.read_data() df = self.handle_data(df) self.save_data(df) if __name__ == '__main__': # site_name = sys.argv[1] # 参数1:站点 # asin_type = int(sys.argv[2]) # 参数2:asin类型来源 site_name = 'us' asin_type = 1 handle_obj = AsinImageLocalPath(site_name=site_name, asin_type=asin_type) handle_obj.run()