Commit 14af7a9b by wangjing

no message

parent ba46b2f2
...@@ -105,7 +105,7 @@ class DwtStDetailWeek(object): ...@@ -105,7 +105,7 @@ class DwtStDetailWeek(object):
) )
self.df_st_detail_last_week.cache() self.df_st_detail_last_week.cache()
print("1周前数据如下:") print("1周前数据如下:")
self.df_st_detail_last_week.show(10, True) # self.df_st_detail_last_week.show(10, True)
self.df_st_detail_last_4_week = self.df_st_detail.filter(f"date_info > '{self.date_info_4_week_ago}'") self.df_st_detail_last_4_week = self.df_st_detail.filter(f"date_info > '{self.date_info_4_week_ago}'")
for col in self.cols: for col in self.cols:
...@@ -114,7 +114,7 @@ class DwtStDetailWeek(object): ...@@ -114,7 +114,7 @@ class DwtStDetailWeek(object):
) )
self.df_st_detail_last_4_week.cache() self.df_st_detail_last_4_week.cache()
print("近4周数据如下:") print("近4周数据如下:")
self.df_st_detail_last_4_week.show(10, True) # self.df_st_detail_last_4_week.show(10, True)
self.df_st_detail_4_week_ago = self.df_st_detail.filter(f"date_info = '{self.date_info_4_week_ago}'") self.df_st_detail_4_week_ago = self.df_st_detail.filter(f"date_info = '{self.date_info_4_week_ago}'")
for col in self.cols: for col in self.cols:
...@@ -123,7 +123,7 @@ class DwtStDetailWeek(object): ...@@ -123,7 +123,7 @@ class DwtStDetailWeek(object):
) )
self.df_st_detail_4_week_ago.cache() self.df_st_detail_4_week_ago.cache()
print("4周前数据如下:") print("4周前数据如下:")
self.df_st_detail_4_week_ago.show(10, True) # self.df_st_detail_4_week_ago.show(10, True)
self.df_st_detail_12_week_ago = self.df_st_detail.filter(f"date_info = '{self.date_info_12_week_ago}'") self.df_st_detail_12_week_ago = self.df_st_detail.filter(f"date_info = '{self.date_info_12_week_ago}'")
for col in self.cols: for col in self.cols:
...@@ -132,11 +132,11 @@ class DwtStDetailWeek(object): ...@@ -132,11 +132,11 @@ class DwtStDetailWeek(object):
) )
self.df_st_detail_12_week_ago.cache() self.df_st_detail_12_week_ago.cache()
print("12周前数据如下:") print("12周前数据如下:")
self.df_st_detail_12_week_ago.show(10, True) # self.df_st_detail_12_week_ago.show(10, True)
self.df_st_detail = self.df_st_detail.filter(f"date_info = '{self.date_info}'").cache() self.df_st_detail = self.df_st_detail.filter(f"date_info = '{self.date_info}'").cache()
print("本周数据如下:") print("本周数据如下:")
self.df_st_detail.show(10, True) # self.df_st_detail.show(10, True)
sql = f""" sql = f"""
select rank, search_num as search_volume, rate as st_search_rate, date_info from ods_rank_search_rate_repeat where site_name = '{self.site_name}'; select rank, search_num as search_volume, rate as st_search_rate, date_info from ods_rank_search_rate_repeat where site_name = '{self.site_name}';
...@@ -147,14 +147,14 @@ class DwtStDetailWeek(object): ...@@ -147,14 +147,14 @@ class DwtStDetailWeek(object):
'date_info_rank', F.row_number().over(window=window) 'date_info_rank', F.row_number().over(window=window)
).filter('date_info_rank=1').drop('date_info_rank', 'date_info').cache() ).filter('date_info_rank=1').drop('date_info_rank', 'date_info').cache()
print("搜索词排名+搜索量+转化率如下:") print("搜索词排名+搜索量+转化率如下:")
self.df_st_rank.show(10, True) # self.df_st_rank.show(10, True)
sql = f""" sql = f"""
select st_key, search_term from ods_st_key where site_name = '{self.site_name}'; select st_key, search_term from ods_st_key where site_name = '{self.site_name}';
""" """
self.df_st_key = self.spark.sql(sql).cache() self.df_st_key = self.spark.sql(sql).cache()
print("搜索词key如下:") print("搜索词key如下:")
self.df_st_key.show(10, True) # self.df_st_key.show(10, True)
sql = f""" sql = f"""
select search_term, st_bsr_cate_1_id_new as category_id, st_bsr_cate_current_id_new as category_current_id, market_cycle_type, date_info from dwt_aba_st_analytics where site_name = '{self.site_name}' and date_type = 'month' and date_info <= '{self.year_month}'; select search_term, st_bsr_cate_1_id_new as category_id, st_bsr_cate_current_id_new as category_current_id, market_cycle_type, date_info from dwt_aba_st_analytics where site_name = '{self.site_name}' and date_type = 'month' and date_info <= '{self.year_month}';
...@@ -165,7 +165,7 @@ class DwtStDetailWeek(object): ...@@ -165,7 +165,7 @@ class DwtStDetailWeek(object):
'date_info_rank', F.row_number().over(window=window) 'date_info_rank', F.row_number().over(window=window)
).filter('date_info_rank=1').drop('date_info_rank', 'date_info').cache() ).filter('date_info_rank=1').drop('date_info_rank', 'date_info').cache()
print("分类、市场周期月数据如下:") print("分类、市场周期月数据如下:")
self.df_st_month.show(10, True) # self.df_st_month.show(10, True)
# 从pgsql获取特殊字符匹配字典表:match_character_dict # 从pgsql获取特殊字符匹配字典表:match_character_dict
pg_sql = f""" pg_sql = f"""
...@@ -196,7 +196,7 @@ class DwtStDetailWeek(object): ...@@ -196,7 +196,7 @@ class DwtStDetailWeek(object):
'search_term', 'rank_change_1_week_ago', 'rank_rate_1_week_ago' 'search_term', 'rank_change_1_week_ago', 'rank_rate_1_week_ago'
).cache() ).cache()
print("1周前排名变化数据如下:") print("1周前排名变化数据如下:")
self.df_st_detail_1_week_ago.show(10, True) # self.df_st_detail_1_week_ago.show(10, True)
self.df_st_detail_2_week_ago = self.df_st_detail_week.filter(F.col('date_info') == self.date_info_2_week_ago).withColumnRenamed( self.df_st_detail_2_week_ago = self.df_st_detail_week.filter(F.col('date_info') == self.date_info_2_week_ago).withColumnRenamed(
'rank_change_last_1_week', 'rank_change_2_week_ago' 'rank_change_last_1_week', 'rank_change_2_week_ago'
...@@ -206,7 +206,7 @@ class DwtStDetailWeek(object): ...@@ -206,7 +206,7 @@ class DwtStDetailWeek(object):
'search_term', 'rank_change_2_week_ago', 'rank_rate_2_week_ago' 'search_term', 'rank_change_2_week_ago', 'rank_rate_2_week_ago'
).cache() ).cache()
print("2周前排名变化数据如下:") print("2周前排名变化数据如下:")
self.df_st_detail_2_week_ago.show(10, True) # self.df_st_detail_2_week_ago.show(10, True)
self.df_st_detail_3_week_ago = self.df_st_detail_week.filter(F.col('date_info') == self.date_info_3_week_ago).withColumnRenamed( self.df_st_detail_3_week_ago = self.df_st_detail_week.filter(F.col('date_info') == self.date_info_3_week_ago).withColumnRenamed(
'rank_change_last_1_week', 'rank_change_3_week_ago' 'rank_change_last_1_week', 'rank_change_3_week_ago'
...@@ -216,7 +216,7 @@ class DwtStDetailWeek(object): ...@@ -216,7 +216,7 @@ class DwtStDetailWeek(object):
'search_term', 'rank_change_3_week_ago', 'rank_rate_3_week_ago' 'search_term', 'rank_change_3_week_ago', 'rank_rate_3_week_ago'
).cache() ).cache()
print("3周前排名变化数据如下:") print("3周前排名变化数据如下:")
self.df_st_detail_3_week_ago.show(10, True) # self.df_st_detail_3_week_ago.show(10, True)
self.df_st_detail_week.unpersist() self.df_st_detail_week.unpersist()
def handle_st_flag(self): def handle_st_flag(self):
...@@ -229,7 +229,7 @@ class DwtStDetailWeek(object): ...@@ -229,7 +229,7 @@ class DwtStDetailWeek(object):
'is_search_text', F.lit(1) 'is_search_text', F.lit(1)
).select('search_term', 'is_search_text').cache() ).select('search_term', 'is_search_text').cache()
print("热搜词如下:") print("热搜词如下:")
df_hot_search_term.show(10, True) # df_hot_search_term.show(10, True)
# 上升词:本周环比上周排名增长50%的搜索词 # 上升词:本周环比上周排名增长50%的搜索词
df_rising_search_term = self.df_st_detail.join( df_rising_search_term = self.df_st_detail.join(
...@@ -238,7 +238,7 @@ class DwtStDetailWeek(object): ...@@ -238,7 +238,7 @@ class DwtStDetailWeek(object):
"is_ascending_text", (((F.col('rank_last_week') - F.col('rank')) / F.col('rank_last_week')) >= 0.5).cast('int') "is_ascending_text", (((F.col('rank_last_week') - F.col('rank')) / F.col('rank_last_week')) >= 0.5).cast('int')
).select('search_term', 'is_ascending_text').cache() ).select('search_term', 'is_ascending_text').cache()
print("上升词如下:") print("上升词如下:")
df_rising_search_term.show(10, True) # df_rising_search_term.show(10, True)
# 新增词:本周环比上周新出现的搜索词 # 新增词:本周环比上周新出现的搜索词
df_first_search_term = self.df_st_detail.join( df_first_search_term = self.df_st_detail.join(
...@@ -247,7 +247,7 @@ class DwtStDetailWeek(object): ...@@ -247,7 +247,7 @@ class DwtStDetailWeek(object):
'is_first_text', F.lit(1) 'is_first_text', F.lit(1)
).select('search_term', 'is_first_text').cache() ).select('search_term', 'is_first_text').cache()
print("新增词如下:") print("新增词如下:")
df_first_search_term.show(10, True) # df_first_search_term.show(10, True)
# 高回报词:最近4周都出现且点击占比(总)>转化占比(总) # 高回报词:最近4周都出现且点击占比(总)>转化占比(总)
df_high_return_search_term = self.df_st_detail_last_4_week.groupBy(['search_term', 'date_info']).agg( df_high_return_search_term = self.df_st_detail_last_4_week.groupBy(['search_term', 'date_info']).agg(
...@@ -263,7 +263,7 @@ class DwtStDetailWeek(object): ...@@ -263,7 +263,7 @@ class DwtStDetailWeek(object):
'is_high_return_text', F.lit(1) 'is_high_return_text', F.lit(1)
).select('search_term', 'is_high_return_text').cache() ).select('search_term', 'is_high_return_text').cache()
print("高回报词如下:") print("高回报词如下:")
df_high_return_search_term.show(10, True) # df_high_return_search_term.show(10, True)
self.df_st_detail = self.df_st_detail.join( self.df_st_detail = self.df_st_detail.join(
df_hot_search_term, 'search_term', 'left' df_hot_search_term, 'search_term', 'left'
......
...@@ -14,7 +14,7 @@ if __name__ == '__main__': ...@@ -14,7 +14,7 @@ if __name__ == '__main__':
CommonUtil.judge_is_work_hours( CommonUtil.judge_is_work_hours(
site_name=site_name, date_type=date_type, date_info=date_info, site_name=site_name, date_type=date_type, date_info=date_info,
principal='chenyuanjie', priority=1, export_tools_type=1, belonging_to_process='ABA周增长' principal='hejiangming', priority=1, export_tools_type=1, belonging_to_process='ABA周增长'
) )
db_type = DbTypes.postgresql_cluster.name db_type = DbTypes.postgresql_cluster.name
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment