Commit 957e48f8 by hejiangming

店铺新增字段计算 导出增加判断 空分区不执行

parent dac2671d
...@@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(sys.path[0])) ...@@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(sys.path[0]))
from utils.db_util import DBUtil from utils.db_util import DBUtil
from utils.ssh_util import SSHUtil from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil from utils.common_util import CommonUtil
from utils.hdfs_utils import HdfsUtils
if __name__ == '__main__': if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None) site_name = CommonUtil.get_sys_arg(1, None)
date_type = CommonUtil.get_sys_arg(2, None) date_type = CommonUtil.get_sys_arg(2, None)
...@@ -36,6 +36,15 @@ if __name__ == '__main__': ...@@ -36,6 +36,15 @@ if __name__ == '__main__':
# 获取数据库连接 # 获取数据库连接
engine = DBUtil.get_db_engine(db_type, site_name) engine = DBUtil.get_db_engine(db_type, site_name)
# 导出前校验 Hive 分区是否有数据,避免空分区触发交换导致 PG 数据被清空
hive_partition_path = f"/home/big_data_selection/dwt/dwt_aba_st_analytics/site_name={site_name}/date_type={date_type}/date_info={date_info}"
hive_files = HdfsUtils.read_list(hive_partition_path)
if not hive_files:
print(f"[ERROR] Hive 分区无数据文件,路径:{hive_partition_path},跳过导出,请先检查 DWT 计算任务是否正常写入!")
engine.dispose()
sys.exit(1)
print(f"Hive 分区文件数:{len(hive_files)},路径:{hive_partition_path},继续导出")
# 保证幂等性,先删除原始表同周期的数据 # 保证幂等性,先删除原始表同周期的数据
sql = f""" sql = f"""
drop table if exists {export_tb}; drop table if exists {export_tb};
...@@ -92,7 +101,36 @@ if __name__ == '__main__': ...@@ -92,7 +101,36 @@ if __name__ == '__main__':
"rating_30_day_num", "rating_30_day_num",
"rating_90_day_num", "rating_90_day_num",
"rating_1_year_num", "rating_1_year_num",
"rating_lifetime_num" "rating_lifetime_num",
# 功能1:销量展示
"fb_shop_total_sales",
"fb_shop_home_sales",
"fb_shop_home_null_flag",
# 功能2:FBM 占比
"fb_fbm_asin_num",
"fb_fbm_valid_asin_num",
"fb_fbm_rate",
"fb_fbm_valid_rate",
# 功能3:Hot New 榜单
"fb_nsr_asin_num",
"fb_nsr_rate",
# 功能4:Best Seller 榜单
"fb_bs_asin_num",
"fb_bs_rate",
# 功能5:新品销量占比 / 新品数量占比(流量选品口径)
"fb_new_asin_sales_rate",
# 功能6:新品/老品平均利润率
"fb_new_ocean_profit_rate",
"fb_new_air_profit_rate",
"fb_old_ocean_profit_rate",
"fb_old_air_profit_rate",
"fb_flow_new_asin_rate",
# 功能7:店铺综合评分(各星级占比)
"fb_star_5_pct",
"fb_star_4_pct",
"fb_star_3_pct",
"fb_star_2_pct",
"fb_star_1_pct"
], ],
partition_dict={ partition_dict={
"site_name": site_name, "site_name": site_name,
...@@ -117,8 +155,8 @@ if __name__ == '__main__': ...@@ -117,8 +155,8 @@ if __name__ == '__main__':
cp_index_flag=False, cp_index_flag=False,
) )
update_workflow_sql = f""" update_workflow_sql = f"""
update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback' update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback'
and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}' and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}'
""" """
CommonUtil.modify_export_workflow_status(update_workflow_sql, site_name, date_type, date_info) CommonUtil.modify_export_workflow_status(update_workflow_sql, site_name, date_type, date_info)
......
...@@ -33,9 +33,10 @@ if __name__ == '__main__': ...@@ -33,9 +33,10 @@ if __name__ == '__main__':
num, num,
created_at, created_at,
updated_at, updated_at,
regexp_replace(seller_address, E'[\\r\\n\\t]+', ' ', 'g') as seller_address, regexp_replace(seller_address, E'[\\r\\n\\t]+', ' ', 'g') as seller_address,
regexp_replace(seller_rating, E'[\\r\\n\\t]+', ' ', 'g') as seller_rating, regexp_replace(seller_rating, E'[\\r\\n\\t]+', ' ', 'g') as seller_rating,
regexp_replace(feedback_histogram, E'[\\r\\n\\t]+', ' ', 'g') as feedback_histogram regexp_replace(feedback_histogram, E'[\\r\\n\\t]+', ' ', 'g') as feedback_histogram,
regexp_replace(metadata_json, E'[\\r\\n\\t]+', ' ', 'g') as metadata_json
from {import_table} from {import_table}
where 1=1 where 1=1
and \$CONDITIONS and \$CONDITIONS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment