Commit 2691d9d0 by chenyuanjie

店铺数据解析店铺评分

parent 47359d84
......@@ -110,6 +110,7 @@ class DwtFbBaseReport(object):
count_lifetime as count_lifetime_num,
country_name as fb_country_name,
seller_address,
seller_rating,
date_format(updated_at, 'yyyy-MM-dd HH:mm:ss') as fb_crawl_date
from ods_seller_account_feedback
where site_name = '{self.site_name}'
......@@ -134,6 +135,23 @@ class DwtFbBaseReport(object):
"""
self.df_fb_feedback = self.spark.sql(sqlQuery=sql)
self.df_fb_feedback = self.df_fb_feedback.drop_duplicates(['seller_id']).cache()
# 解析seller_rating: 格式为"30天|-|90天|-|1年|-|历史",切分后转double,-1改为0
_rating_split = F.split(F.col("seller_rating"), r"\|-\|")
self.df_fb_feedback = self.df_fb_feedback \
.withColumn("rating_30_day_num",
F.when(_rating_split.getItem(0).cast(DoubleType()) == -1.0, F.lit(0.0))
.otherwise(_rating_split.getItem(0).cast(DoubleType()))) \
.withColumn("rating_90_day_num",
F.when(_rating_split.getItem(1).cast(DoubleType()) == -1.0, F.lit(0.0))
.otherwise(_rating_split.getItem(1).cast(DoubleType()))) \
.withColumn("rating_1_year_num",
F.when(_rating_split.getItem(2).cast(DoubleType()) == -1.0, F.lit(0.0))
.otherwise(_rating_split.getItem(2).cast(DoubleType()))) \
.withColumn("rating_lifetime_num",
F.when(_rating_split.getItem(3).cast(DoubleType()) == -1.0, F.lit(0.0))
.otherwise(_rating_split.getItem(3).cast(DoubleType())))
print(sql)
# 获取我们内部的店铺与asin的数据库(从搜索词抓下来,店铺与asin的关系表)
......@@ -406,6 +424,10 @@ class DwtFbBaseReport(object):
F.lit(None).alias('usr_mask_progress'),
F.col('business_name'),
F.col('business_addr'),
F.col('rating_30_day_num'),
F.col('rating_90_day_num'),
F.col('rating_1_year_num'),
F.col('rating_lifetime_num'),
F.lit(self.site_name).alias('site_name'),
F.lit(self.date_type).alias('date_type'),
F.lit(self.date_info).alias('date_info')
......
......@@ -88,7 +88,11 @@ if __name__ == '__main__':
"usr_mask_type",
"usr_mask_progress",
"business_name",
"business_addr"
"business_addr",
"rating_30_day_num",
"rating_90_day_num",
"rating_1_year_num",
"rating_lifetime_num"
],
partition_dict={
"site_name": site_name,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment