dwt_st_base_report.py 2.18 KB
Newer Older
chenyuanjie committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil
from utils.db_util import DBUtil

if __name__ == '__main__':
    # 获取入参
    site_name = CommonUtil.get_sys_arg(1, None)
    date_type = CommonUtil.get_sys_arg(2, None)
    date_info = CommonUtil.get_sys_arg(3, None)

    #  获取最后一个参数--判断导出环境
    test_flag = CommonUtil.get_sys_arg(len(sys.argv) - 1, None)
    print(f"执行参数为{sys.argv}")

    if test_flag == 'test':
        db_type = 'postgresql_test'
        print("导出到测试库中")
    else:
        db_type = "postgresql"
        print("导出到PG库中")

    # 获取数据库连接
    engine = DBUtil.get_db_engine(db_type, site_name)

    export_master_tb = f"{site_name}_aba_last_total_time"
    # 用于补充数据导入的分区
    year_str = CommonUtil.safeIndex(date_info.split("-"), 0, None)
    year_next = str(int(year_str) + 1)

    export_table = f"{export_master_tb}_{year_str}"

    # 基于该表设计,仅需创建分区补充即可
    # 为了避免数据重复导出,需要在导出前先清空该date_info的数据
    sql = f"""
        create table if not exists {export_table} partition of {export_master_tb} for values from ('{year_str}') to ('{year_next}');
        delete from {export_table} where date_info = '{date_info}'
    """
    # 通过db引擎执行sql
    DBUtil.engine_exec_sql(engine, sql)

    # 导出执行sqoop的sh编写
    sh = CommonUtil.build_export_sh(
        site_name=site_name,
        db_type=db_type,
        hive_tb="dwt_st_base_report",
        export_tb=export_table,
        col=[
            "st_key",
            "search_term",
            "st_volume",
            "st_rank",
            "st_orders",
            "years",
            "created_time",
            "updated_time",
            "date_type",
            "date_info"
        ],
        partition_dict={
            "site_name": site_name,
            "date_type": date_type,
            "date_info": date_info
        }
    )

    client = SSHUtil.get_ssh_client()
    SSHUtil.exec_command_async(client, sh, ignore_err=False)
    client.close()

    pass