Commit 20ea475b by chenyuanjie

流量选品-bug修复

parent ca34c459
......@@ -517,7 +517,7 @@ class DwtFlowAsin(Templates):
"package_quantity": 1, "is_movie_label": 0, "is_brand_label": 0, "is_alarm_brand": 0,
"title_matching_degree": 0.0, "asin_lqs_rating": 0.0, "follow_sellers_count": -1})
self.df_save = self.df_save.repartition(60).persist(StorageLevel.DISK_ONLY)
self.df_save = self.df_save.drop_duplicates(['asin']).filter(F.length(F.col("asin"))<=10)
self.df_save = self.df_save.drop_duplicates(['asin']).filter((F.col("asin").isNotNull()) & (F.col("asin") != "") & (F.length(F.col("asin")) <= 10))
print("数据量为:", self.df_save.count())
self.df_save.show(10, truncate=False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment