Commit 9a02c65a by wangjing

no message

parent 68c22cae
......@@ -98,6 +98,10 @@ class DwtStDetailWeek(object):
'dt_rank', F.row_number().over(window=window)
).filter('dt_rank=1').drop('dt_rank', 'updated_time').cache()
# 对数据列清洗 有些是\0的数据
for col in ['search_term', 'asin1', 'asin2', 'asin3', 'product_title1', 'product_title2', 'product_title3', 'brand1', 'brand2', 'brand3', 'category1', 'category2', 'category3']:
self.df_st_detail = self.df_st_detail.withColumn(col, F.regexp_replace(F.col(col), '\x00', ''))
self.df_st_detail_last_week = self.df_st_detail.filter(f"date_info = '{self.date_info_last_week}'")
for col in self.cols:
self.df_st_detail_last_week = self.df_st_detail_last_week.withColumnRenamed(
......
......@@ -103,7 +103,7 @@ if __name__ == '__main__':
"site_name": site_name,
"date_type": date_type,
"date_info": date_info
}
},num_mappers=2
)
client = SSHUtil.get_ssh_client()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment