asin_image_local_path.py 2 KB
Newer Older
chenyuanjie committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
import os
import sys
import time
import traceback

import pandas as pd
os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.templates_mysql import TemplatesMysql
from utils.db_util import DbTypes, DBUtil


class AsinImageLocalPath(object):

    def __init__(self, site_name='us', asin_type=1):
        self.site_name = site_name
        self.asin_type = asin_type
        self.get_first_local_dir()
        self.engine_srs = DBUtil.get_db_engine(db_type=DbTypes.srs.name, site_name=self.site_name)

    def get_first_local_dir(self):
        if self.asin_type == 1:
            self.first_local_dir = f"/mnt/data/img_data/amazon_self/{self.site_name}"
        elif self.asin_type == 2:
            self.first_local_dir = f"/mnt/data/img_data/amazon/{self.site_name}"
        else:
            quit()

    def read_data(self):
        if self.asin_type == 1:
            sql = f"select asin from selection.{self.site_name}_self_asin_image;"
        elif self.asin_type == 2:
            sql = f"select asin from selection.{self.site_name}_self_asin_image;"
        else:
            sql = ""
        df = pd.read_sql(sql, con=self.engine_srs)
        print(f"sql: {sql}", df.shape)
        return df

    def handle_data(self, df):
        df["local_path"] = df.asin.apply(lambda x: f"{self.first_local_dir}/{x[:1]}/{x[:2]}/{x[:3]}/{x[:4]}/{x[:5]}/{x[:6]}/{x}.jpg")
        df["asin_type"] = self.asin_type
        df["site_name"] = self.site_name
        return df

    def save_data(self, df):
        df.to_sql("asin_image_local_path", con=self.engine_srs, if_exists="append", index=False)

    def run(self):
        df = self.read_data()
        df = self.handle_data(df)
        self.save_data(df)


if __name__ == '__main__':
    # site_name = sys.argv[1]  # 参数1:站点
    # asin_type = int(sys.argv[2])  # 参数2:asin类型来源
    site_name = 'us'
    asin_type = 1
    handle_obj = AsinImageLocalPath(site_name=site_name, asin_type=asin_type)
    handle_obj.run()