1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#! /bin/env bash
source /mnt/run_shell/sqoop_shell/templates.sh
# author: ffman
# usage: 同步各个站点的店铺表的asin详情表
# params: $1-->site_name; $2-->date_type; $3-->week
# version: 2.0
# create_date:2022-11-15
# update_date:2022-11-15
# 根据site_name来判断数据库名称
if [ $1 == all ];
then
# site_name_array=(us uk de es fr it)
site_name_array=(uk de es fr it)
else
site_name_array=($1)
fi
echo "site_name_array: ${site_name_array}"
date_type=$2
date_info=$3
year=`echo $date_info | cut -d \- -f 1`
week=`echo $date_info | cut -d \- -f 2`
for site_name in ${site_name_array[*]}
do
echo 1. 当前连接的mysql数据库站点: ${site_name}
if [ $site_name == us ];
then
db=selection
else
db=selection_$site_name
fi
echo "db: ${db}"
if [ ${week} -ge 46 ] && [ ${year} -ge 2022 ]
then
if [ ${site_name} == us ]
then
cols=',sp_num,`describe`'
else
cols=',sp_num'
fi
fi
echo site_name:${site_name}, year:${year}, week:${week}, cols:${cols}
echo 2. 先删除已经存在的分区
hive_table=ods_asin_detail
hdfs_path="/home/big_data_selection/ods/ods_asin_detail/site_name=${site_name}/date_type=$date_type/date_info=$date_info"
hdfs dfs -rm -r ${hdfs_path}/*
echo 3. 导入数据
import_table=${site_name}_asin_detail_${year}_$((week))
query="select id,asin,img_url,title,title_len,price,rating,total_comments,buy_box_seller_type,page_inventory,category,volume,weight,rank,launch_time,created_at,updated_at,category_state,img_num,img_type,activity_type,one_two_val,three_four_val,five_six_val,eight_val,qa_num,one_star,two_star,three_star,four_star,five_star,low_star,together_asin,brand,ac_name,material,node_id,data_type ${cols} from ${import_table} where 1=1 and \$CONDITIONS"
import_data
done