auto_desploy.py 5.1 KB
import os
import shutil
import sys
import zipfile

sys.path.append(os.path.dirname(sys.path[0]))
from utils.hdfs_utils import HdfsUtils
from utils.common_util import CommonUtil

dev_work_place = "/opt/module/spark/demo/py_demo"


def get_git_work_place(branch_name: str) -> str:
    if branch_name == 'develop':
        return "/root/git_work_place"
    usrName = CommonUtil.safeIndex(branch_name.split("/"), 1, None)
    assert usrName is not None, "根据分支获取用户名失败!"
    return f"/home/{usrName}/Amazon-Selection"


def crlf_2_lf(full_path):
    """
      sh windows脚本转换为 unix分隔符
    :param full_path:
    :return:
    """
    WINDOWS_LINE_ENDING = b'\r\n'
    UNIX_LINE_ENDING = b'\n'
    with open(full_path, 'rb') as open_file:
        content = open_file.read()
    return content.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING)


def zip_yswgutils_to_hdfs():
    git_work_place = get_git_work_place("develop")
    dist_src = f"{git_work_place}/Pyspark_job"
    work_place = "/tmp"
    os.chdir(work_place)
    filename = "yswg_utils.zip"
    result_path = os.path.join(work_place, filename)
    if os.path.exists(result_path):
        os.remove(result_path)

    zip_obj = zipfile.ZipFile(filename, 'w')
    for root, dirs, files in os.walk(dist_src):
        for f in files:
            full_path = os.path.join(root, f)
            # 文件大小大于20M跳过
            if os.path.getsize(full_path) > 20 * 1024 * 1024:
                continue

            zip_path = full_path[len(dist_src) + 1:]
            zip_obj.write(full_path, zip_path)

    zip_obj.close()
    print(f"文件压缩在{result_path}中")
    print("上传环境到hdfs中.................")
    hdfs_path = f"/lib/{filename}"
    client = HdfsUtils.get_hdfs_cilent()
    client.delete(hdfs_path)
    client.upload(hdfs_path, result_path, cleanup=True)
    print("删除本地包中.................")
    os.remove(result_path)
    print("success")
    pass


def git_update(branch_name):
    assert branch_name is not None, "分支名不能为空!"

    work_place = get_git_work_place(branch_name)
    if not os.path.exists(work_place):
        os.mkdir(work_place)
    os.chdir(work_place)

    src = "Pyspark_job"
    if len(os.listdir(work_place)) <= 0:
        cmds = [
            'git init',
            'git remote add origin http://47.106.101.75/selection/Amazon-Selection.git',
            'git config core.sparseCheckout true',
            f'echo "{src}" >> .git/info/sparse-checkout',
            'git pull --depth=1 origin master',
            f"git fetch",
            f"git checkout {branch_name}",
            "git config --global credential.helper store",
            "git pull"
        ]
        for cmd in cmds:
            print(cmd)
            output = str(os.popen(cmd).read())
            print(output)

    update_flag = True
    print(str(os.popen(f"git checkout {branch_name}").read()))
    output = str(os.popen("git pull").read())
    print(output)
    if output.startswith("There is no tracking information for the current branch."):
        update_flag = False

    if update_flag and branch_name == 'develop':
        #  备份原始目录
        cmd = f"cp -r {dev_work_place} {dev_work_place}_back"
        os.popen(cmd)
        src_dir = os.path.join(work_place, src)
        target_dir = dev_work_place

        for root, dirs, files in os.walk(src_dir):
            for file in files:
                src_path = os.path.join(root, file)
                target_path = src_path.replace(src_dir, target_dir)

                # 目录不存在则创建
                if not os.path.exists(os.path.dirname(target_path)):
                    os.makedirs(os.path.dirname(target_path))

                # 覆盖git文件到线上目录
                shutil.copyfile(src_path, target_path)
                pass

        print("git 更新成功!!!")
    return update_flag


def zip_yswgutils_to_hdfs_local():
    dist_src = r"E:\Amazon-Selection\Pyspark_job"
    work_place = r"C:\Users\123\Desktop\tmp"
    os.chdir(work_place)
    filename = "yswg_utils.zip"
    result_path = os.path.join(work_place, filename)
    if os.path.exists(result_path):
        os.remove(result_path)

    zip_obj = zipfile.ZipFile(filename, 'w')
    for root, dirs, files in os.walk(dist_src):
        for f in files:
            full_path = os.path.join(root, f)
            # 文件大小大于20M跳过
            if os.path.getsize(full_path) > 20 * 1024 * 1024:
                continue

            zip_path = full_path[len(dist_src) + 1:]
            zip_obj.write(full_path, zip_path)

    zip_obj.close()
    print(f"文件压缩在{result_path}中")

    print("上传环境到hdfs中.................")
    hdfs_path = f"/lib/{filename}"
    client = HdfsUtils.get_hdfs_cilent()
    client.delete(hdfs_path)
    client.upload(hdfs_path, result_path, cleanup=True)
    print("删除本地包中.................")
    print("success")
    pass


if __name__ == '__main__':
    branch_name = CommonUtil.get_sys_arg(1, "develop")
    update_flag = git_update(branch_name)
    # 更新分区
    if update_flag and branch_name == 'develop':
        zip_yswgutils_to_hdfs()