dwt_fb_asin_info.py 3.53 KB
Newer Older
chenyuanjie committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))

from utils.db_util import DBUtil
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil, DateTypes

if __name__ == '__main__':
    site_name = CommonUtil.get_sys_arg(1, None)
    date_type = CommonUtil.get_sys_arg(2, None)
    date_info = CommonUtil.get_sys_arg(3, None)
    #  获取最后一个参数
    test_flag = CommonUtil.get_sys_arg(len(sys.argv) - 1, None)
    print(f"执行参数为{sys.argv}")

    CommonUtil.judge_is_work_hours(site_name=site_name, date_type=date_type, date_info=date_info,
                                   principal='chenyuanjie',
                                   priority=3, export_tools_type=1, belonging_to_process='店铺流程')

    if test_flag == 'test':
        db_type = 'postgresql_test'
        print("导出到测试库中")
    else:
        # db_type = "postgresql"
        # print("导出到PG库中")
        db_type = "postgresql_cluster"
        print("导出到PG集群库库中")



    year_str = CommonUtil.safeIndex(date_info.split("-"), 0, None)
    suffix = str(date_info).replace("-", "_")
    base_tb = f"{site_name}_fb_asin_info"
    export_master_tb = f"{base_tb}_{year_str}"
    export_tb = f"{base_tb}_{suffix}"
    next_month = CommonUtil.get_next_val(date_type, date_info)

    # 获取数据库连接
    engine = DBUtil.get_db_engine(db_type, site_name)


    # 保证幂等性,先删除原始表同周期的数据
    sql = f"""
         drop table if exists {export_tb};
         create table if not exists {export_tb} 
                (
                like {export_master_tb}  including comments
         );
    """
    DBUtil.engine_exec_sql(engine, sql)

    # 导出agg表
    sh_report = CommonUtil.build_export_sh(
        site_name=site_name,
        db_type=db_type,
        hive_tb="dwt_fb_asin_info",
        export_tb=export_tb,
        col=[
            "seller_id",
            "account_name",
            "asin",
            "asin_title",
            "asin_launch_time",
            "is_asin_new",
            "asin_package_quantity",
            "is_pq_flag",
            "parent_asin",
            "created_time",
            "updated_time",
            "date_info",
            "asin_img_url",
            "asin_price",
            "asin_rating",
            "asin_total_comments",
            "asin_weight",
            "asin_volume"
        ],
        partition_dict={
            "site_name": site_name,
            "date_type": date_type,
            "date_info": date_info
        }
    )

    client = SSHUtil.get_ssh_client()
    SSHUtil.exec_command_async(client, sh_report, ignore_err=False)


    client.close()
    # 创建索引并交换分区
    DBUtil.add_pg_part(
        engine,
        source_tb_name=export_tb,
        part_master_tb=export_master_tb,
        part_val={
            "from": [date_info],
            "to": [next_month]
        },
        cp_index_flag=False,
    )

    # 关闭链接
    engine.dispose()

    # 往导出流程表插入导出完成数据,方便监听导出脚本是否全部完成
    update_workflow_sql = f"""                
                    update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback' 
                    and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}'
                """
    CommonUtil.modify_export_workflow_status(update_workflow_sql, site_name, date_type, date_info)

    print("success")

    print("success")