"""
以图搜图执行代码顺序:
# 1. 更新图片并下载到本地h7, h5运行
/mnt/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark-3.2.0-bin-hadoop3.2/demo/py_demo/img_search/img_download.py us amazon_inv 200 1
# 2. 新增图片-默认选择最近7天
/mnt/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark-3.2.0-bin-hadoop3.2/demo/py_demo/img_search/img_local_path.py us amazon_inv
# 3. 提取图片特征 -- h567--多台机器同时跑(暂时放在h5)
/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark-3.2.0-bin-hadoop3.2/demo/py_demo/img_search/img_extract_features.py us amazon_inv 1000 5
# 4. 导入图片特征数据ods
/mnt/run_shell/sqoop_shell/import/img_features.sh us amazon_inv
# 5. 切片dim
/mnt/run_shell/spark_shell/dim/img_dim_features_slice.sh us amazon_inv
# 6.1 建立索引对应关系--doris-img_hdfs_index   先导入copy表
/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark-3.2.0-bin-hadoop3.2/demo/py_demo/img_search/img_hdfs_index.py us amazon_inv
# 6.2 建立索引对应关系--hive-img_dwd_id_index
/opt/module/spark/bin/spark-submit --master yarn --driver-memory 2g --executor-memory 4g --executor-cores 1 --num-executors 1 --queue spark /opt/module/spark/demo/py_demo/img_search/img_dwd_id_index_multiprocess.py us amazon_inv 3
# 7. 导出id和index对应关系到doris(copy表)
/opt/module/spark/bin/spark-submit --master yarn --driver-memory 20g --executor-memory 20g --executor-cores 4 --num-executors 2 --queue spark /opt/module/spark/demo/py_demo/img_search/img_id_index_to_doris.py us amazon_inv
# 8. 删除索引hdfs路径相关的文件
# 删除索引hdfs路径相关的文件
hdfs dfs -rm -r /home/img_search/img_parquet/${site_name}/${img_type}/* 2>/dev/null || true
hdfs dfs -rm -r /home/img_search/img_tmp/${site_name}/${img_type}/* 2>/dev/null || true
hdfs dfs -rm -r /home/img_search/img_index/${site_name}/${img_type}/* 2>/dev/null || true
# 9. 上传parquet文件到hdfs
hdfs dfs -put /mnt/data/img_data/img_parquet/${site_name}/${img_type}/*/*.parquet /home/img_search/img_parquet/${site_name}/${img_type}/
# 10. 创建索引
/mnt/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark/demo/py_demo/img_search/img_create_index.py
# 11. 把hdfs的索引文件拉到本地
rm -rf /mnt/data/img_data/img_index/${site_name}/${img_type}/* 2>/dev/null || true
hdfs dfs -get /home/img_search/img_index/${site_name}/${img_type}/knn.index /mnt/data/img_data/img_index/${site_name}/${img_type}/
# 12. 开启接口
ssh hadoop7 systemctl restart img_search.service
# 13. 交换表名
/opt/module/anaconda3/envs/pyspark/bin/python3.8 /opt/module/spark/demo/py_demo/img_search/img_alter_table_name.py ${site_name} ${img_type}

"""