import os import sys import time import traceback import pandas as pd os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" sys.path.append(os.path.dirname(sys.path[0])) # 上级目录 from utils.templates_mysql import TemplatesMysql # from ..utils.templates_mysql import TemplatesMysql class PicturesLocalPath(object): def __init__(self, site_name='us', dir_path='/mnt/data/img_data'): self.site_name = site_name self.dir_path = f"{dir_path}/{site_name}" self.engine_pg = TemplatesMysql().engine_pg self.db_save = f"{self.site_name}_pictures_local_path_copy" def save_data(self, df): while True: try: df.to_sql(self.db_save, con=self.engine_pg, if_exists="append", index=False) break except Exception as e: print(e, traceback.format_exc()) time.sleep(10) self.engine_pg = TemplatesMysql().engine_pg continue # @staticmethod # def get_last_level_directories(path): # all_directories = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] # last_level_dirs = [d for d in all_directories if not [sub for sub in os.listdir(d) if os.path.isdir(os.path.join(d, sub))]] # print(f"last_level_dirs:{last_level_dirs}") # return last_level_dirs @staticmethod def get_last_level_directories(path): last_level_dirs = [] sub_dirs = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] # 如果当前目录没有子目录,那么它就是一个“最后一层”目录 if not sub_dirs: return [path] # 否则,对每一个子目录递归调用该函数 for dir in sub_dirs: last_level_dirs.extend(PicturesLocalPath.get_last_level_directories(dir)) return last_level_dirs def get_pics_abs_path(self, path): last_level_dirs = self.get_last_level_directories(path=path) # print(f"last_level_dirs:{last_level_dirs}") for abs_dir in last_level_dirs: data_jpg = [(f.replace(".jpg", ""), os.path.join(abs_dir, f)) for f in os.listdir(abs_dir) if f.endswith('.jpg')] df = pd.DataFrame(data_jpg, columns=["asin", "local_path"]) print(f"abs_dir:{abs_dir}, df.shape:{df.shape}") if df.shape[0] > 0: self.save_data(df=df) def run(self): print(f"self.dir_path:{self.dir_path}") self.get_pics_abs_path(path=self.dir_path) if __name__ == '__main__': handle_obj = PicturesLocalPath() handle_obj.run() # path = "/mnt/data/img_data/us/4/41/413/4130" # path = "/mnt/data/img_data/us/4/41/413/4130/41306" # handle_obj.get_pics_abs_path(path) # print("Last level directories:", dirs) # print("JPG files in those directories:", jpgs)