Commit 957e48f8 by hejiangming

店铺新增字段计算 导出增加判断 空分区不执行

parent dac2671d
......@@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(sys.path[0]))
from utils.db_util import DBUtil
from utils.ssh_util import SSHUtil
from utils.common_util import CommonUtil
from utils.hdfs_utils import HdfsUtils
if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None)
date_type = CommonUtil.get_sys_arg(2, None)
......@@ -36,6 +36,15 @@ if __name__ == '__main__':
# 获取数据库连接
engine = DBUtil.get_db_engine(db_type, site_name)
# 导出前校验 Hive 分区是否有数据,避免空分区触发交换导致 PG 数据被清空
hive_partition_path = f"/home/big_data_selection/dwt/dwt_aba_st_analytics/site_name={site_name}/date_type={date_type}/date_info={date_info}"
hive_files = HdfsUtils.read_list(hive_partition_path)
if not hive_files:
print(f"[ERROR] Hive 分区无数据文件,路径:{hive_partition_path},跳过导出,请先检查 DWT 计算任务是否正常写入!")
engine.dispose()
sys.exit(1)
print(f"Hive 分区文件数:{len(hive_files)},路径:{hive_partition_path},继续导出")
# 保证幂等性,先删除原始表同周期的数据
sql = f"""
drop table if exists {export_tb};
......@@ -92,7 +101,36 @@ if __name__ == '__main__':
"rating_30_day_num",
"rating_90_day_num",
"rating_1_year_num",
"rating_lifetime_num"
"rating_lifetime_num",
# 功能1:销量展示
"fb_shop_total_sales",
"fb_shop_home_sales",
"fb_shop_home_null_flag",
# 功能2:FBM 占比
"fb_fbm_asin_num",
"fb_fbm_valid_asin_num",
"fb_fbm_rate",
"fb_fbm_valid_rate",
# 功能3:Hot New 榜单
"fb_nsr_asin_num",
"fb_nsr_rate",
# 功能4:Best Seller 榜单
"fb_bs_asin_num",
"fb_bs_rate",
# 功能5:新品销量占比 / 新品数量占比(流量选品口径)
"fb_new_asin_sales_rate",
# 功能6:新品/老品平均利润率
"fb_new_ocean_profit_rate",
"fb_new_air_profit_rate",
"fb_old_ocean_profit_rate",
"fb_old_air_profit_rate",
"fb_flow_new_asin_rate",
# 功能7:店铺综合评分(各星级占比)
"fb_star_5_pct",
"fb_star_4_pct",
"fb_star_3_pct",
"fb_star_2_pct",
"fb_star_1_pct"
],
partition_dict={
"site_name": site_name,
......@@ -117,8 +155,8 @@ if __name__ == '__main__':
cp_index_flag=False,
)
update_workflow_sql = f"""
update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback'
update_workflow_sql = f"""
update selection.workflow_progress set `status`='导出pg集群完成', status_val=6, over_date=CURRENT_TIME, is_end='是' where page='店铺Feedback'
and `date_info`='{date_info}' and date_type='{date_type}' and site_name='{site_name}'
"""
CommonUtil.modify_export_workflow_status(update_workflow_sql, site_name, date_type, date_info)
......
......@@ -33,9 +33,10 @@ if __name__ == '__main__':
num,
created_at,
updated_at,
regexp_replace(seller_address, E'[\\r\\n\\t]+', ' ', 'g') as seller_address,
regexp_replace(seller_rating, E'[\\r\\n\\t]+', ' ', 'g') as seller_rating,
regexp_replace(feedback_histogram, E'[\\r\\n\\t]+', ' ', 'g') as feedback_histogram
regexp_replace(seller_address, E'[\\r\\n\\t]+', ' ', 'g') as seller_address,
regexp_replace(seller_rating, E'[\\r\\n\\t]+', ' ', 'g') as seller_rating,
regexp_replace(feedback_histogram, E'[\\r\\n\\t]+', ' ', 'g') as feedback_histogram,
regexp_replace(metadata_json, E'[\\r\\n\\t]+', ' ', 'g') as metadata_json
from {import_table}
where 1=1
and \$CONDITIONS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment