#! /bin/env bash

# author: ffman
# usage: 同步各个站点的zr,sp,sb,ac,bs,er,tr表
# params: $1-->site_name_array; $2-->data_type_array; $3-->year; $4-->week
# version: 3.0
# create_date:2022-3-7
# update_date:2022-5-18


if [ $1 == all ];
then
        site_name_array=(us uk de es fr it)
else
        site_name_array=($1)
fi

if [ $2 == all ];
then
        data_type_array=(zr sp sb ac bs er tr)
else
        data_type_array=($2)
fi


year=$3
week=$4
dt=${year}-${week}
echo "site_name_array: ${site_name_array}, year: ${year}, week: ${week}"

import_data(){
    /opt/module/sqoop-1.4.6/bin/sqoop import -D mapred.job.queue.name=default -D mapred.task.timeout=0 --append \
        --connect jdbc:mysql://rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com:3306/${db} \
        --username adv_yswg \
        --password HmRCMUjt03M33Lze \
        --target-dir ${hdfs_path} \
        --query "select * from ${mysql_table} where 1=1 and  \$CONDITIONS" \
        --fields-terminated-by '\t' \
        --compress \
        --compression-codec lzop \
        --m 1
}


for site_name in ${site_name_array[*]}
    do
        echo 1. 当前连接的mysql数据库站点: ${site_name}
        if [ $site_name == us ];
        then
            db=selection
        else
            db=selection_$site_name
        fi
        echo "db: ${db}"
        for data_type in ${data_type_array[*]}
            do
                echo 2. 先删除已经存在的分区: ${site_name}, ${data_type}
                hdfs_path=/home/ffman/ods/ods_search_term_rank_${data_type}/site_name=${site_name}/dt=${dt}
                hdfs dfs -rm -r ${hdfs_path}/*
                echo 3. 导入数据
                mysql_table=${site_name}_search_term_rank_${data_type}_${year}_${week}
                echo "mysql_table: ${mysql_table}"
                import_data
                echo 4. 建立lzo索引
                hadoop jar \
                        /opt/module/hadoop/share/hadoop/common/hadoop-lzo-0.4.20.jar \
                        com.hadoop.compression.lzo.DistributedLzoIndexer \
                        ${hdfs_path}
                echo 5. 恢复外部表的元数据
                hive -e "MSCK REPAIR TABLE big_data_selection.ods_search_term_rank_${data_type}"
            done
    done
