import os
import sys
import time
import traceback

import pandas as pd
os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.templates_mysql import TemplatesMysql


class PicturesLocal(object):

    def __init__(self, site_name='us', dir_path='/mnt/data/img_data/us/', self_flag='', platform='temu'):
        self.site_name = site_name
        self.dir_path = dir_path
        self.self_flag = self_flag
        self.platform = platform
        self.dir_path_list = os.listdir(dir_path)
        self.file_path_list = dir_path
        self.asin_list = dir_path
        self.engine = TemplatesMysql().engine

    def list_files(self, dir_path):
        file_path_list = []
        for dirpath, dirnames, filenames in os.walk(dir_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                # print("dirpath, dirnames, filenames:", dirpath, dirnames, filenames)
                # print(file_path)
                asin = file_path.split("/")[-1].replace(".jpg", "")
                file_path_list.append([asin, file_path])
        df = pd.DataFrame(file_path_list, columns=['asin', 'local_path'])
        print(df.shape)
        return df

    def save_data(self, df):
        df.to_sql(f"{self.site_name}_pictures_local_path{self.self_flag}", con=self.engine, if_exists="append", index=False)

    def run(self):
        if self.self_flag:
            sql = f"select * from {self.site_name}_pictures_local_path{self.self_flag};"
            df_read = pd.read_sql(sql, con=self.engine)
            print("df_read.shape:", df_read.shape)
            site_list = os.listdir(self.dir_path)
            site_list = [site for site in site_list if".jpg" not in site]
            print(site_list)
            self.asin_list = []
            self.dir_path_list = []
            for site in site_list:
                print(f"{self.dir_path}/{site}")
                dir_list = os.listdir(f"{self.dir_path}/{site}")
                asin_list = [asin.replace(".jpg", "") for asin in dir_list]
                print("dir_list:", dir_list[:10])
                print("asin_list:", asin_list[:10])
                dir_path_list = [os.path.join(self.dir_path, f"{site}/", dir_name) for dir_name in dir_list]
                # self.asin_list = [asin.replace(".jpg", "") for asin in dir_list]
                # self.dir_path_list = [os.path.join(self.dir_path, dir_name) for dir_name in dir_list]
                self.asin_list.extend(asin_list)
                self.dir_path_list.extend(dir_path_list)

            df = pd.DataFrame({'asin': self.asin_list, 'local_path': self.dir_path_list})
            print(df.shape)
            print(df.head())
            df['platform'] = self.platform
            print("df.shape:", df.shape)
            df = df.loc[~df.asin.isin(df_read.asin)]
            print("df.shape:", df.shape)

            self.save_data(df=df)
            quit()

        for dir_name in self.dir_path_list:
            while True:
                try:
                    dir_path = self.dir_path + dir_name
                    print("当前存储的图片路径:", dir_path)
                    df = self.list_files(dir_path=dir_path)
                    self.save_data(df=df)
                    break
                except Exception as e:
                    print(e, traceback.format_exc())
                    self.engine = TemplatesMysql().engine
                    time.sleep(10)
                    continue


if __name__ == '__main__':
    platform = sys.argv[1]  # 参数1:平台
    # handle_obj = PicturesLocal(dir_path='/mnt/data/img_data/us/')
    # handle_obj = PicturesLocal(dir_path='/mnt/data/img_data/temu/', self_flag="_self", platform='temu')
    handle_obj = PicturesLocal(dir_path=f'/mnt/data/img_data/{platform}', self_flag="_self", platform=platform)
    handle_obj.run()