Commit 2a634fbb by Peng

使用会员账号下载卖家精灵bsr销量。可以获取到5w排名之后的数据。每个月1号开始跑。查询sql有调整。只获取当月的预估销量。不在使用上个月的。因为是在当月跑的b…

使用会员账号下载卖家精灵bsr销量。可以获取到5w排名之后的数据。每个月1号开始跑。查询sql有调整。只获取当月的预估销量。不在使用上个月的。因为是在当月跑的bsr。之前是账号不够。在上个月末就开始跑数据
parent 1461772e
......@@ -13,7 +13,7 @@ import math
class CalculateMean(BaseUtils):
def __init__(self, site_name='us', nums_start=0, nums_step=10000000, year=2025, week=1, flag_uniformity=False):
def __init__(self, site_name='us', nums_start=0, nums_step=10000000, year=2026, week=1, flag_uniformity=False):
super().__init__()
self.site_name = site_name
self.engine = self.mysql_connect()
......@@ -51,34 +51,42 @@ class CalculateMean(BaseUtils):
print(_year_month)
print(f"读取 {self.site_name}_one_category")
# ---- 6 月份 ----
sql_6 = f"""
SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2025_5' and orders=0 and rank>50000 GROUP BY `name`)
UNION
select * from {self.site_name}_one_category where `year_month`='2025_8' and rank<=50000
"""
print('查询原始表6:', sql_6)
self.df_sum_6 = self.engine.read_sql(sql_6)
# ---- 7 月份 ----
sql_7 = f"""
SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2025_5' and orders=0 and rank>50000 GROUP BY `name`)
UNION
select * from {self.site_name}_one_category where `year_month`='2025_9' and rank<=50000
"""
print('查询原始表7:', sql_7)
self.df_sum_7 = self.engine.read_sql(sql_7)
# 合并后直接靠 keep='last' 留 7 月
self.df_sum = pd.concat([self.df_sum_6, self.df_sum_7], ignore_index=True)
print("合并前总行数:", len(self.df_sum))
self.df_sum.drop_duplicates(subset=['rank', 'name'], keep='last', inplace=True)
print("合并去重后总行数:", len(self.df_sum))
# # ---- 6 月份 ----
# sql_6 = f"""
# SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2026_5' and orders=0 and rank>50000 GROUP BY `name`)
# UNION
# select * from {self.site_name}_one_category where `year_month`='2026_8' and rank<=50000
# """
# print('查询原始表6:', sql_6)
# self.df_sum_6 = self.engine.read_sql(sql_6)
#
# # ---- 7 月份 ----
# sql_7 = f"""
# SELECT * from {self.site_name}_one_category WHERE id in ( select max(id) from {self.site_name}_one_category where `year_month`='2026_5' and orders=0 and rank>50000 GROUP BY `name`)
# UNION
# select * from {self.site_name}_one_category where `year_month`='2026_9' and rank<=50000
# """
# print('查询原始表7:', sql_7)
# self.df_sum_7 = self.engine.read_sql(sql_7)
#
# # 合并后直接靠 keep='last' 留 7 月
# self.df_sum = pd.concat([self.df_sum_6, self.df_sum_7], ignore_index=True)
# print("合并前总行数:", len(self.df_sum))
# self.df_sum.drop_duplicates(subset=['rank', 'name'], keep='last', inplace=True)
# print("合并去重后总行数:", len(self.df_sum))
print(f"读取 {self.site_name}_one_category")
sql = f"select * from {self.site_name}_one_category where `year_month`='{_year_month}';"
print('查询原始表:', sql)
self.df_sum = self.engine.read_sql(sql)
print("self.df_sum.shape1:", self.df_sum.shape)
time.sleep(2)
# 排序、后续处理
self.df_sum.sort_values(by=['name', 'rank'], inplace=True)
# # 删除重复行,保留最后一行
self.df_sum.drop_duplicates(['name', 'rank'], keep='last', inplace=True)
print("self.df_sum.shape2:", self.df_sum.shape)
print(self.df_sum[['year_month', 'rank', 'orders']].head(10).to_string())
self.cate_list = list(set(self.df_sum.name))
sql_select = f"SELECT `year_month` from selection.week_20_to_30 WHERE `week`={int(self.week)} and `year`={self.year}"
print(sql_select, 'sql_select:')
......@@ -161,7 +169,7 @@ class CalculateMean(BaseUtils):
with engine_us_mysql.begin() as conn:
time_strftime = time.strftime("%Y-%m-%d %X", time.localtime())
update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='ASIN销量' and date_info='2025-{week}' and site_name='{site_name}' and date_type='week'"
update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='ASIN销量' and date_info='2026-{week}' and site_name='{site_name}' and date_type='week'"
print(update_workflow_progress)
conn.execute(update_workflow_progress)
......@@ -182,21 +190,21 @@ if __name__ == '__main__':
# week = '04'
print("week 周:", week)
time.sleep(2)
handle_obj_us = CalculateMean(site_name='us', year=2025, week=week)
handle_obj_us = CalculateMean(site_name='us', year=2026, week=week)
handle_obj_us.run()
handle_obj_us.sendMessage(week, site_name='us')
# handle_obj_uk = CalculateMean(site_name='uk', year=2025, week=week)
# handle_obj_uk = CalculateMean(site_name='uk', year=2026, week=week)
# handle_obj_uk.run()
# handle_obj_uk.sendMessage(week, site_name='uk')
# handle_obj_de = CalculateMean(site_name='de', year=2025, week=week)
# handle_obj_de = CalculateMean(site_name='de', year=2026, week=week)
# handle_obj_de.run()
# handle_obj_de.sendMessage(week, site_name='de')
# handle_obj_fr = CalculateMean(site_name='fr', year=2025, week=week)
# handle_obj_fr = CalculateMean(site_name='fr', year=2026, week=week)
# handle_obj_fr.run()
# handle_obj_fr.sendMessage(week, site_name='fr')
# handle_obj_es = CalculateMean(site_name='es', year=2025, week=week)
# handle_obj_es = CalculateMean(site_name='es', year=2026, week=week)
# handle_obj_es.run()
# handle_obj_es.sendMessage(week, site_name='es')
# handle_obj_it = CalculateMean(site_name='it', year=2025, week=week)
# handle_obj_it = CalculateMean(site_name='it', year=2026, week=week)
# handle_obj_it.run()
# handle_obj_it.sendMessage(week, site_name='it')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment