import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil
from utils.common_util import DateTypes
from utils.hdfs_utils import HdfsUtils

if __name__ == '__main__':
    site_name = CommonUtil.get_sys_arg(1, None)
    st_type = CommonUtil.get_sys_arg(2, None)
    date_type = CommonUtil.get_sys_arg(3, None)
    date_info = CommonUtil.get_sys_arg(4, None)
    assert site_name is not None, "site_name 不能为空！"
    assert st_type is not None, "st_type 不能为空！"
    assert date_type is not None, "date_type 不能为空！"
    assert date_info is not None, "date_info 不能为空！"

    hive_tb = f"ods_search_term_{st_type}"

    if st_type in ["zr", "sp"]:
        cols = "search_term,asin,page,page_row,created_time,updated_time,id"
    elif st_type in ["sb", "tr"]:
        cols = "search_term,asin,page,data_type,created_time,updated_time,id"
    else:
        cols = "search_term,asin,page,created_time,updated_time,id"


    # 日期拆分
    d1, d2 = CommonUtil.split_month_week_date(date_type, date_info)

    if date_type == DateTypes.week.name:
        d2 = f'0{d2}' if int(d2) < 10 else f'{d2}'
        if site_name == 'us' and date_info >= '2023-18':
            db_type = 'postgresql'
            # pg的分区周单位数是带0，如01、02、03
            if date_info >= '2023-34':
                db_type = 'postgresql_14'

            import_tb = f"{site_name}_search_term_rank_{st_type}_{d1}_{d2}"
        else:
            db_type = 'postgresql_14'
            import_tb = f"{site_name}_search_term_rank_{st_type}_{d1}_{d2}"

    if date_type == DateTypes.month.name or date_type == DateTypes.month_week.name:
        if site_name in ['us', 'uk', 'de']:
            db_type = 'postgresql_14'
            # pg的分区单位数是带0，如01、02、03
            d2 = f'0{d2}' if int(d2) < 10 else f'{d2}'
            import_tb = f"{site_name}_search_term_rank_{st_type}_month_{d1}_{d2}"
        else :
            print(f"其他站点{date_type}数据暂未明确，请检查是否dateType传输有误")
            exit()



    query = f"""
       select {cols}
        from {import_tb}
        where 1 = 1
        and \$CONDITIONS
    """

    print(f"当前链接的数据库为：{db_type}，同步的表为：{import_tb}")
    partition_dict = {
        "site_name": site_name,
        "date_type": date_type,
        "date_info": date_info,
    }
    hdfs_path = CommonUtil.build_hdfs_path(hive_tb, partition_dict=partition_dict)
    print(f"hdfs_path is {hdfs_path}")

    if st_type in ['er', 'tr']:
        empty_flag = False
        print(f"st_type类型为{st_type},符合不检测类型跳过检测！")
    else:
        empty_flag, check_flag = CommonUtil.check_schema_before_import(db_type=db_type,
                                                                       site_name=site_name,
                                                                       query=query,
                                                                       hive_tb_name=hive_tb,
                                                                       msg_usr=['fangxingjun','pengyanbing','chenyuanjie'],
                                                                       partition_dict=partition_dict
                                                                       )
        assert check_flag, f"导入hive表{hive_tb}表结构检查失败！请检查query是否异常！！"

    if not empty_flag:
        # zr的数据量较大，同步时进行多进程同步
        if st_type in ['zr']:
            sh = CommonUtil.build_import_sh(site_name=site_name,
                                            db_type=db_type,
                                            query=query,
                                            hdfs_path=hdfs_path,
                                            map_num=10,
                                            key='id')
        else:
            sh = CommonUtil.build_import_sh(site_name=site_name,
                                            db_type=db_type,
                                            query=query,
                                            hdfs_path=hdfs_path)

        # 导入前先删除
        HdfsUtils.delete_hdfs_file(hdfs_path)
        client = SSHUtil.get_ssh_client()
        SSHUtil.exec_command_async(client, sh, ignore_err=False)
        CommonUtil.after_import(hdfs_path=hdfs_path, hive_tb=hive_tb)
        client.close()

        # 导入后检测--检测数据一致性
        if date_type != 'month_week':
            CommonUtil.check_import_sync_num(db_type=db_type,
                                             partition_dict=partition_dict,
                                             import_query=query,
                                             hive_tb_name=hive_tb,
                                             msg_usr=['fangxingjun','pengyanbing','chenyuanjie'])

    pass
