Commit 13f4a99e by chenyuanjie

关联流量-同步内部asin数据

parent 6c0b5a67
import os
import sys
sys.path.append(os.path.dirname(sys.path[0]))
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil
from utils.hdfs_utils import HdfsUtils
if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None)
assert site_name is not None, "site_name 不能为空!"
hive_table = "ods_self_asin_related_traffic"
partition_dict = {"site_name": site_name}
hdfs_path = CommonUtil.build_hdfs_path(hive_table, partition_dict=partition_dict)
print(f"hdfs_path is {hdfs_path}")
db_type = 'mysql'
import_table = f"{site_name}_self_asin_detail"
sql_query = f"""
select
id,
asin,
together_asin,
sp_initial_seen_asins_json,
sp_4stars_initial_seen_asins_json,
sp_delivery_initial_seen_asins_json,
compare_similar_asin_json,
result_list_json,
updated_at
from {import_table}
where site = '{site_name}'
and DATE(updated_at) >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
and \$CONDITIONS
"""
# 生成导出脚本
import_sh = CommonUtil.build_import_sh(
site_name=site_name, db_type=db_type, query=sql_query, hdfs_path=hdfs_path, map_num=25, key='id'
)
# 导入前先删除原始hdfs数据
HdfsUtils.delete_hdfs_file(hdfs_path)
# 创建ssh Client对象--用于执行cmd命令
client = SSHUtil.get_ssh_client()
SSHUtil.exec_command_async(client, import_sh, ignore_err=False)
# 创建lzo索引和修复元数据
CommonUtil.after_import(hdfs_path=hdfs_path, hive_tb=hive_table)
# 关闭链接
client.close()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment