import os import sys sys.path.append(os.path.dirname(sys.path[0])) from utils.ssh_util import SSHUtil from utils.common_util import CommonUtil from utils.db_util import DBUtil if __name__ == '__main__': # 获取入参 site_name = CommonUtil.get_sys_arg(1, None) date_type = CommonUtil.get_sys_arg(2, None) date_info = CommonUtil.get_sys_arg(3, None) # 获取最后一个参数--判断导出环境 test_flag = CommonUtil.get_sys_arg(len(sys.argv) - 1, None) print(f"执行参数为{sys.argv}") if test_flag == 'test': db_type = 'postgresql_test' print("导出到测试库中") else: db_type = "postgresql" print("导出到PG库中") # 获取数据库连接 engine = DBUtil.get_db_engine(db_type, site_name) export_master_tb = f"{site_name}_aba_last_total_time" # 用于补充数据导入的分区 year_str = CommonUtil.safeIndex(date_info.split("-"), 0, None) year_next = str(int(year_str) + 1) export_table = f"{export_master_tb}_{year_str}" # 基于该表设计,仅需创建分区补充即可 # 为了避免数据重复导出,需要在导出前先清空该date_info的数据 sql = f""" create table if not exists {export_table} partition of {export_master_tb} for values from ('{year_str}') to ('{year_next}'); delete from {export_table} where date_info = '{date_info}' """ # 通过db引擎执行sql DBUtil.engine_exec_sql(engine, sql) # 导出执行sqoop的sh编写 sh = CommonUtil.build_export_sh( site_name=site_name, db_type=db_type, hive_tb="dwt_st_base_report", export_tb=export_table, col=[ "st_key", "search_term", "st_volume", "st_rank", "st_orders", "years", "created_time", "updated_time", "date_type", "date_info" ], partition_dict={ "site_name": site_name, "date_type": date_type, "date_info": date_info } ) client = SSHUtil.get_ssh_client() SSHUtil.exec_command_async(client, sh, ignore_err=False) client.close() pass