dws_st_theme.py 4.65 KB
import os
import sys
from datetime import datetime

sys.path.append(os.path.dirname(sys.path[0]))
from utils.db_util import DBUtil
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil,DateTypes

if __name__ == '__main__':
    site_name = CommonUtil.get_sys_arg(1, None)
    date_type = CommonUtil.get_sys_arg(2, None)
    date_info = CommonUtil.get_sys_arg(3, None)


    #  获取最后一个参数
    test_flag = CommonUtil.get_sys_arg(len(sys.argv) - 1, None)
    print(f"执行参数为{sys.argv}")

    if test_flag == 'test':
        db_type = 'postgresql_test'
        print("导出到测试库中")
    else:
        db_type = "postgresql_cluster"
        print("导出到PG集群库库中")

    year_str = CommonUtil.safeIndex(date_info.split("-"), 0, None)
    suffix = str(date_info).replace("-", "_")
    base_tb = f"{site_name}_st_theme_detail"

    # 获取数据库连接
    engine = DBUtil.get_db_engine(db_type, site_name)

    if date_type in (DateTypes.last30day.name, DateTypes.month_week.name):
        export_tb_target = f"{base_tb}_last30day"
        export_tb_copy = f"{export_tb_target}_copy"
        export_tb = export_tb_copy
        # 如果copy表不存在则创建copy表
        sql = f"""
               create table if not exists {export_tb_copy} 
              (
                  like {export_tb_target} including indexes including comments
              );
              truncate table {export_tb_copy};
              """
        DBUtil.engine_exec_sql(engine, sql)
    else:
        export_master_tb = f"{base_tb}_{year_str}"
        export_tb = f"{base_tb}_{suffix}"
        next_month = CommonUtil.get_next_val(date_type, date_info)
        # 保证幂等性,先删除原始表同周期的数据
        sql = f"""
                     drop table if exists {export_tb};
                     create table if not exists {export_tb} 
                            (
                            like {export_master_tb}  including comments
                     );
                """
        DBUtil.engine_exec_sql(engine, sql)

    if test_flag == "month_append":
        partition_dict = {
            "site_name": site_name,
            "date_type": "month",
            "date_info": date_info
        }
    else:
        partition_dict = {
            "site_name": site_name,
            "date_type": date_type,
            "date_info": date_info
        }

    # 导出agg表
    sh_report = CommonUtil.build_export_sh(
        site_name=site_name,
        db_type=db_type,
        hive_tb="dws_st_theme",
        export_tb=export_tb,
        col=[
            "st_key",
            "search_term",
            "theme_ch",
            "theme_en",
            "theme_label_ch",
            "theme_label_en",
            "date_info",
            "created_time",
            "updated_time"
        ],
        partition_dict=partition_dict
    )

    client = SSHUtil.get_ssh_client()
    SSHUtil.exec_command_async(client, sh_report, ignore_err=False)

    client.close()
    if date_type in (DateTypes.last30day.name, DateTypes.month_week.name):
        #  构建索引并交换表名
        DBUtil.exchange_tb(engine,
                           source_tb_name=export_tb_copy,
                           target_tb_name=export_tb_target,
                           cp_index_flag=False)
        update_workflow_sql = f"""
                            replace INTO selection.workflow_everyday
                            (site_name, report_date, status, status_val, table_name, date_type, page, is_end, remark,export_db_type)
                            VALUES('{site_name}', '{datetime.now().date()}', '导出PG数据库完成', 14, 'us_aba_last_30_day', '30_day', 'ABA搜索词', '是', 'ABA搜索词最近30天表','{db_type}');
                        """

    else:
        # 创建索引并交换分区
        DBUtil.add_pg_part(
            engine,
            source_tb_name=export_tb,
            part_master_tb=export_master_tb,
            part_val={
                "from": [date_info],
                "to": [next_month]
            },
            cp_index_flag=False,
        )
        update_workflow_sql = f"""
                        UPDATE selection.workflow_everyday SET status='导出PG数据库完成', status_val=14,is_end ='是',export_db_type = '{db_type}'  
                        WHERE site_name= '{site_name}' and date_type='{date_type}' and report_date= '{date_info}'  and page ='ABA搜索词'
                    """

    # 往导出流程表插入导出完成数据,方便监听导出脚本是否全部完成
    CommonUtil.modify_export_workflow_status(update_workflow_sql, site_name, date_type, date_info)

    # 关闭链接
    engine.dispose()
    print("success")