import os import shutil import sys import zipfile sys.path.append(os.path.dirname(sys.path[0])) from utils.hdfs_utils import HdfsUtils from utils.common_util import CommonUtil dev_work_place = "/opt/module/spark/demo/py_demo" def get_git_work_place(branch_name: str) -> str: if branch_name == 'develop': return "/root/git_work_place" usrName = CommonUtil.safeIndex(branch_name.split("/"), 1, None) assert usrName is not None, "根据分支获取用户名失败!" return f"/home/{usrName}/Amazon-Selection" def crlf_2_lf(full_path): """ sh windows脚本转换为 unix分隔符 :param full_path: :return: """ WINDOWS_LINE_ENDING = b'\r\n' UNIX_LINE_ENDING = b'\n' with open(full_path, 'rb') as open_file: content = open_file.read() return content.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING) def zip_yswgutils_to_hdfs(): git_work_place = get_git_work_place("develop") dist_src = f"{git_work_place}/Pyspark_job" work_place = "/tmp" os.chdir(work_place) filename = "yswg_utils.zip" result_path = os.path.join(work_place, filename) if os.path.exists(result_path): os.remove(result_path) zip_obj = zipfile.ZipFile(filename, 'w') for root, dirs, files in os.walk(dist_src): for f in files: full_path = os.path.join(root, f) # 文件大小大于20M跳过 if os.path.getsize(full_path) > 20 * 1024 * 1024: continue zip_path = full_path[len(dist_src) + 1:] zip_obj.write(full_path, zip_path) zip_obj.close() print(f"文件压缩在{result_path}中") print("上传环境到hdfs中.................") hdfs_path = f"/lib/{filename}" client = HdfsUtils.get_hdfs_cilent() client.delete(hdfs_path) client.upload(hdfs_path, result_path, cleanup=True) print("删除本地包中.................") os.remove(result_path) print("success") pass def git_update(branch_name): assert branch_name is not None, "分支名不能为空!" work_place = get_git_work_place(branch_name) if not os.path.exists(work_place): os.mkdir(work_place) os.chdir(work_place) src = "Pyspark_job" if len(os.listdir(work_place)) <= 0: cmds = [ 'git init', 'git remote add origin http://47.106.101.75/selection/Amazon-Selection.git', 'git config core.sparseCheckout true', f'echo "{src}" >> .git/info/sparse-checkout', 'git pull --depth=1 origin master', f"git fetch", f"git checkout {branch_name}", "git config --global credential.helper store", "git pull" ] for cmd in cmds: print(cmd) output = str(os.popen(cmd).read()) print(output) update_flag = True print(str(os.popen(f"git checkout {branch_name}").read())) output = str(os.popen("git pull").read()) print(output) if output.startswith("There is no tracking information for the current branch."): update_flag = False if update_flag and branch_name == 'develop': # 备份原始目录 cmd = f"cp -r {dev_work_place} {dev_work_place}_back" os.popen(cmd) src_dir = os.path.join(work_place, src) target_dir = dev_work_place for root, dirs, files in os.walk(src_dir): for file in files: src_path = os.path.join(root, file) target_path = src_path.replace(src_dir, target_dir) # 目录不存在则创建 if not os.path.exists(os.path.dirname(target_path)): os.makedirs(os.path.dirname(target_path)) # 覆盖git文件到线上目录 shutil.copyfile(src_path, target_path) pass print("git 更新成功!!!") return update_flag def zip_yswgutils_to_hdfs_local(): dist_src = r"E:\Amazon-Selection\Pyspark_job" work_place = r"C:\Users\123\Desktop\tmp" os.chdir(work_place) filename = "yswg_utils.zip" result_path = os.path.join(work_place, filename) if os.path.exists(result_path): os.remove(result_path) zip_obj = zipfile.ZipFile(filename, 'w') for root, dirs, files in os.walk(dist_src): for f in files: full_path = os.path.join(root, f) # 文件大小大于20M跳过 if os.path.getsize(full_path) > 20 * 1024 * 1024: continue zip_path = full_path[len(dist_src) + 1:] zip_obj.write(full_path, zip_path) zip_obj.close() print(f"文件压缩在{result_path}中") print("上传环境到hdfs中.................") hdfs_path = f"/lib/{filename}" client = HdfsUtils.get_hdfs_cilent() client.delete(hdfs_path) client.upload(hdfs_path, result_path, cleanup=True) print("删除本地包中.................") print("success") pass if __name__ == '__main__': branch_name = CommonUtil.get_sys_arg(1, "develop") update_flag = git_update(branch_name) # 更新分区 if update_flag and branch_name == 'develop': zip_yswgutils_to_hdfs()