Commit 10a8426c by wujicang

Update self_asin_redis.py

parent 764c8b08
...@@ -168,11 +168,7 @@ def export(): ...@@ -168,11 +168,7 @@ def export():
img_num, img_num,
date_format(updated_at, '%Y-%m-%d %H:%m:%S') updated_at, date_format(updated_at, '%Y-%m-%d %H:%m:%S') updated_at,
date_format(online_time, '%Y-%m-%d %H:%m:%S') online_time, date_format(online_time, '%Y-%m-%d %H:%m:%S') online_time,
case returns
when returns = 'Customers usually keep this item' then 1
when returns = 'Frequently returned item' then 2
else 0
end as is_high_return_rate
from {site_name}_self_asin_detail from {site_name}_self_asin_detail
where updated_at >= '{last_day}' where updated_at >= '{last_day}'
and updated_at <= '{next_day}' and updated_at <= '{next_day}'
...@@ -186,6 +182,14 @@ def export(): ...@@ -186,6 +182,14 @@ def export():
.where("row_number == 1") \ .where("row_number == 1") \
.drop("row_number") .drop("row_number")
asin_df = asin_df.withColumn("is_high_return_rate", F.expr("""
case
when returns = 'Customers usually keep this item' then 1
when returns = 'Frequently returned item' then 2
else 0
end
""")).drop("returns")
# 填充默认值 # 填充默认值
asin_df = na_fill(asin_df).cache() asin_df = na_fill(asin_df).cache()
if "redis" in export_type: if "redis" in export_type:
...@@ -305,11 +309,7 @@ def export_all(): ...@@ -305,11 +309,7 @@ def export_all():
img_num, img_num,
date_format(updated_at, '%Y-%m-%d %H:%m:%S') updated_at, date_format(updated_at, '%Y-%m-%d %H:%m:%S') updated_at,
date_format(online_time, '%Y-%m-%d %H:%m:%S') online_time, date_format(online_time, '%Y-%m-%d %H:%m:%S') online_time,
case returns
when returns = 'Customers usually keep this item' then 1
when returns = 'Frequently returned item' then 2
else 0
end as is_high_return_rate
from ( from (
select max(id) as max_id select max(id) as max_id
from {site_name}_self_asin_detail from {site_name}_self_asin_detail
...@@ -318,6 +318,13 @@ def export_all(): ...@@ -318,6 +318,13 @@ def export_all():
inner join {site_name}_self_asin_detail tmp2 on tmp1.max_id = tmp2.id inner join {site_name}_self_asin_detail tmp2 on tmp1.max_id = tmp2.id
""" """
asin_df = SparkUtil.read_jdbc(spark, DbTypes.mysql.name, site_name, query=query) asin_df = SparkUtil.read_jdbc(spark, DbTypes.mysql.name, site_name, query=query)
asin_df = asin_df.withColumn("is_high_return_rate", F.expr("""
case
when returns = 'Customers usually keep this item' then 1
when returns = 'Frequently returned item' then 2
else 0
end
""")).drop("returns")
# 填充默认值 # 填充默认值
asin_df = na_fill(asin_df) asin_df = na_fill(asin_df)
asin_df.toJSON().foreachPartition(functools.partial(save_to_redis_list, batch=1000, redis_key=redis_key, ttl=3600 * 24 * 7)) asin_df.toJSON().foreachPartition(functools.partial(save_to_redis_list, batch=1000, redis_key=redis_key, ttl=3600 * 24 * 7))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment