1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import sys
import time
import traceback
import pandas as pd
os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
sys.path.append(os.path.dirname(sys.path[0])) # 上级目录
from utils.templates_mysql import TemplatesMysql
# from ..utils.templates_mysql import TemplatesMysql
class PicturesLocalPath(object):
def __init__(self, site_name='us', dir_path='/mnt/data/img_data'):
self.site_name = site_name
self.dir_path = f"{dir_path}/{site_name}"
self.engine_pg = TemplatesMysql().engine_pg
self.db_save = f"{self.site_name}_pictures_local_path_copy"
def save_data(self, df):
while True:
try:
df.to_sql(self.db_save, con=self.engine_pg, if_exists="append", index=False)
break
except Exception as e:
print(e, traceback.format_exc())
time.sleep(10)
self.engine_pg = TemplatesMysql().engine_pg
continue
# @staticmethod
# def get_last_level_directories(path):
# all_directories = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
# last_level_dirs = [d for d in all_directories if not [sub for sub in os.listdir(d) if os.path.isdir(os.path.join(d, sub))]]
# print(f"last_level_dirs:{last_level_dirs}")
# return last_level_dirs
@staticmethod
def get_last_level_directories(path):
last_level_dirs = []
sub_dirs = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
# 如果当前目录没有子目录,那么它就是一个“最后一层”目录
if not sub_dirs:
return [path]
# 否则,对每一个子目录递归调用该函数
for dir in sub_dirs:
last_level_dirs.extend(PicturesLocalPath.get_last_level_directories(dir))
return last_level_dirs
def get_pics_abs_path(self, path):
last_level_dirs = self.get_last_level_directories(path=path)
# print(f"last_level_dirs:{last_level_dirs}")
for abs_dir in last_level_dirs:
data_jpg = [(f.replace(".jpg", ""), os.path.join(abs_dir, f)) for f in os.listdir(abs_dir) if f.endswith('.jpg')]
df = pd.DataFrame(data_jpg, columns=["asin", "local_path"])
print(f"abs_dir:{abs_dir}, df.shape:{df.shape}")
if df.shape[0] > 0:
self.save_data(df=df)
def run(self):
print(f"self.dir_path:{self.dir_path}")
self.get_pics_abs_path(path=self.dir_path)
if __name__ == '__main__':
handle_obj = PicturesLocalPath()
handle_obj.run()
# path = "/mnt/data/img_data/us/4/41/413/4130"
# path = "/mnt/data/img_data/us/4/41/413/4130/41306"
# handle_obj.get_pics_abs_path(path)
# print("Last level directories:", dirs)
# print("JPG files in those directories:", jpgs)