1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#! /bin/env bash
date_type=$2
date_info=$3
hive_table=dim_asin_detail
hive_level=dim
if [ $1 == all ];
then
site_name_array=(us uk de es fr it)
# site_name_array=(uk de es fr it)
else
site_name_array=($1)
fi
flag=1
for site_name in ${site_name_array[*]}
do
echo "site_name: ${site_name}","date_type: ${date_type}","date_info: ${date_info}"
echo "hive_table: ${hive_table}"
hdfs dfs -rm -r /home/big_data_selection/${hive_level}/${hive_table}/site_name=${site_name}/date_type=${date_type}/date_info=${date_info}
if [ $date_type = 'month' ] || [ $date_type = 'last30day' ]
then
exec_num=30
else
exec_num=10
fi
# 放到yarn执行
/opt/module/spark/bin/spark-submit \
--master yarn \
--driver-memory 4g \
--executor-memory 20g \
--executor-cores 4 \
--num-executors $exec_num \
--queue spark \
/opt/module/spark/demo/py_demo/${hive_level}/${hive_table}.py ${site_name} ${date_type} ${date_info}
done