Commit 9ac6cf3e by chenyuanjie

fix

parent 1c7acf03
...@@ -98,14 +98,14 @@ class DimKeepaAsinInfo(object): ...@@ -98,14 +98,14 @@ class DimKeepaAsinInfo(object):
# 过滤脏数据:productType in (3,4,5) 且 title 为空的异常数据不做保留 # 过滤脏数据:productType in (3,4,5) 且 title 为空的异常数据不做保留
~(F.col("product_type").isin(3, 4, 5) & F.col("title").isNull()) ~(F.col("product_type").isin(3, 4, 5) & F.col("title").isNull())
).cache() ).cache()
# 写入 Doris 需带 site_name 分区字段,并把 keepa_launch_time 转为 DATETIME(DDL 类型已改) # 写入 Doris 需带 site_name 分区字段,并把 keepa_launch_time / updated_time 转为 DATETIME(DDL 类型已改)
# weight 字段在 Doris dwd_keepa_asin_detail 已废弃,select 不带 # weight 字段在 Doris dwd_keepa_asin_detail 已废弃,select 不带
self.df_to_doris = self.df_keepa_asin.select( self.df_to_doris = self.df_keepa_asin.select(
F.lit(self.site_name).alias('site_name'), F.lit(self.site_name).alias('site_name'),
'asin', 'package_length', 'package_width', 'package_height', 'package_weight', 'item_weight', 'asin', 'package_length', 'package_width', 'package_height', 'package_weight', 'item_weight',
'listed_since', 'release_date', 'tracking_since', 'listed_since', 'release_date', 'tracking_since',
F.to_timestamp(F.col('keepa_launch_time')).alias('keepa_launch_time'), F.to_timestamp(F.col('keepa_launch_time')).alias('keepa_launch_time'),
'updated_time' F.to_timestamp(F.col('updated_time')).alias('updated_time')
) )
# 读取历史数据 # 读取历史数据
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment