Commit 2a634fbb by Peng

使用会员账号下载卖家精灵bsr销量。可以获取到5w排名之后的数据。每个月1号开始跑。查询sql有调整。只获取当月的预估销量。不在使用上个月的。因为是在当月跑的b…

使用会员账号下载卖家精灵bsr销量。可以获取到5w排名之后的数据。每个月1号开始跑。查询sql有调整。只获取当月的预估销量。不在使用上个月的。因为是在当月跑的bsr。之前是账号不够。在上个月末就开始跑数据
parent 1461772e
...@@ -13,7 +13,7 @@ import math ...@@ -13,7 +13,7 @@ import math
class CalculateMean(BaseUtils): class CalculateMean(BaseUtils):
def __init__(self, site_name='us', nums_start=0, nums_step=10000000, year=2025, week=1, flag_uniformity=False): def __init__(self, site_name='us', nums_start=0, nums_step=10000000, year=2026, week=1, flag_uniformity=False):
super().__init__() super().__init__()
self.site_name = site_name self.site_name = site_name
self.engine = self.mysql_connect() self.engine = self.mysql_connect()
...@@ -51,34 +51,42 @@ class CalculateMean(BaseUtils): ...@@ -51,34 +51,42 @@ class CalculateMean(BaseUtils):
print(_year_month) print(_year_month)
print(f"读取 {self.site_name}_one_category") print(f"读取 {self.site_name}_one_category")
# ---- 6 月份 ---- # # ---- 6 月份 ----
sql_6 = f""" # sql_6 = f"""
SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2025_5' and orders=0 and rank>50000 GROUP BY `name`) # SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2026_5' and orders=0 and rank>50000 GROUP BY `name`)
UNION # UNION
select * from {self.site_name}_one_category where `year_month`='2025_8' and rank<=50000 # select * from {self.site_name}_one_category where `year_month`='2026_8' and rank<=50000
""" # """
print('查询原始表6:', sql_6) # print('查询原始表6:', sql_6)
self.df_sum_6 = self.engine.read_sql(sql_6) # self.df_sum_6 = self.engine.read_sql(sql_6)
#
# ---- 7 月份 ---- # # ---- 7 月份 ----
sql_7 = f""" # sql_7 = f"""
SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2025_5' and orders=0 and rank>50000 GROUP BY `name`) # SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2026_5' and orders=0 and rank>50000 GROUP BY `name`)
UNION # UNION
select * from {self.site_name}_one_category where `year_month`='2025_9' and rank<=50000 # select * from {self.site_name}_one_category where `year_month`='2026_9' and rank<=50000
""" # """
print('查询原始表7:', sql_7) # print('查询原始表7:', sql_7)
self.df_sum_7 = self.engine.read_sql(sql_7) # self.df_sum_7 = self.engine.read_sql(sql_7)
#
# 合并后直接靠 keep='last' 留 7 月 # # 合并后直接靠 keep='last' 留 7 月
self.df_sum = pd.concat([self.df_sum_6, self.df_sum_7], ignore_index=True) # self.df_sum = pd.concat([self.df_sum_6, self.df_sum_7], ignore_index=True)
print("合并前总行数:", len(self.df_sum)) # print("合并前总行数:", len(self.df_sum))
self.df_sum.drop_duplicates(subset=['rank', 'name'], keep='last', inplace=True) # self.df_sum.drop_duplicates(subset=['rank', 'name'], keep='last', inplace=True)
print("合并去重后总行数:", len(self.df_sum)) # print("合并去重后总行数:", len(self.df_sum))
print(f"读取 {self.site_name}_one_category")
sql = f"select * from {self.site_name}_one_category where `year_month`='{_year_month}';"
print('查询原始表:', sql)
self.df_sum = self.engine.read_sql(sql)
print("self.df_sum.shape1:", self.df_sum.shape)
time.sleep(2)
# 排序、后续处理 # 排序、后续处理
self.df_sum.sort_values(by=['name', 'rank'], inplace=True) self.df_sum.sort_values(by=['name', 'rank'], inplace=True)
# # 删除重复行,保留最后一行
self.df_sum.drop_duplicates(['name', 'rank'], keep='last', inplace=True)
print("self.df_sum.shape2:", self.df_sum.shape)
print(self.df_sum[['year_month', 'rank', 'orders']].head(10).to_string()) print(self.df_sum[['year_month', 'rank', 'orders']].head(10).to_string())
self.cate_list = list(set(self.df_sum.name)) self.cate_list = list(set(self.df_sum.name))
sql_select = f"SELECT `year_month` from selection.week_20_to_30 WHERE `week`={int(self.week)} and `year`={self.year}" sql_select = f"SELECT `year_month` from selection.week_20_to_30 WHERE `week`={int(self.week)} and `year`={self.year}"
print(sql_select, 'sql_select:') print(sql_select, 'sql_select:')
...@@ -161,7 +169,7 @@ class CalculateMean(BaseUtils): ...@@ -161,7 +169,7 @@ class CalculateMean(BaseUtils):
with engine_us_mysql.begin() as conn: with engine_us_mysql.begin() as conn:
time_strftime = time.strftime("%Y-%m-%d %X", time.localtime()) time_strftime = time.strftime("%Y-%m-%d %X", time.localtime())
update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='ASIN销量' and date_info='2025-{week}' and site_name='{site_name}' and date_type='week'" update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='ASIN销量' and date_info='2026-{week}' and site_name='{site_name}' and date_type='week'"
print(update_workflow_progress) print(update_workflow_progress)
conn.execute(update_workflow_progress) conn.execute(update_workflow_progress)
...@@ -182,21 +190,21 @@ if __name__ == '__main__': ...@@ -182,21 +190,21 @@ if __name__ == '__main__':
# week = '04' # week = '04'
print("week 周:", week) print("week 周:", week)
time.sleep(2) time.sleep(2)
handle_obj_us = CalculateMean(site_name='us', year=2025, week=week) handle_obj_us = CalculateMean(site_name='us', year=2026, week=week)
handle_obj_us.run() handle_obj_us.run()
handle_obj_us.sendMessage(week, site_name='us') handle_obj_us.sendMessage(week, site_name='us')
# handle_obj_uk = CalculateMean(site_name='uk', year=2025, week=week) # handle_obj_uk = CalculateMean(site_name='uk', year=2026, week=week)
# handle_obj_uk.run() # handle_obj_uk.run()
# handle_obj_uk.sendMessage(week, site_name='uk') # handle_obj_uk.sendMessage(week, site_name='uk')
# handle_obj_de = CalculateMean(site_name='de', year=2025, week=week) # handle_obj_de = CalculateMean(site_name='de', year=2026, week=week)
# handle_obj_de.run() # handle_obj_de.run()
# handle_obj_de.sendMessage(week, site_name='de') # handle_obj_de.sendMessage(week, site_name='de')
# handle_obj_fr = CalculateMean(site_name='fr', year=2025, week=week) # handle_obj_fr = CalculateMean(site_name='fr', year=2026, week=week)
# handle_obj_fr.run() # handle_obj_fr.run()
# handle_obj_fr.sendMessage(week, site_name='fr') # handle_obj_fr.sendMessage(week, site_name='fr')
# handle_obj_es = CalculateMean(site_name='es', year=2025, week=week) # handle_obj_es = CalculateMean(site_name='es', year=2026, week=week)
# handle_obj_es.run() # handle_obj_es.run()
# handle_obj_es.sendMessage(week, site_name='es') # handle_obj_es.sendMessage(week, site_name='es')
# handle_obj_it = CalculateMean(site_name='it', year=2025, week=week) # handle_obj_it = CalculateMean(site_name='it', year=2026, week=week)
# handle_obj_it.run() # handle_obj_it.run()
# handle_obj_it.sendMessage(week, site_name='it') # handle_obj_it.sendMessage(week, site_name='it')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment