Commit 61cd5e9d by fangxingjun

no message

parent 6b7f0ae7
import math
import os
import sys
import time
import traceback
import pandas as pd
sys.path.append(os.path.dirname(sys.path[0]))
from utils.secure_db_client import get_remote_engine
......@@ -106,6 +109,50 @@ def export_data(site_name, date_type, date_info):
)
def get_minid_maxid(site_name, date_type, date_info):
engine = get_remote_engine(
site_name=site_name, # -> database "selection"
db_type="postgresql_14", # -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
engine_mysql = get_remote_engine(
site_name=site_name, # -> database "selection"
db_type="mysql", # -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
# 查询最小和最大 id
print(site_name, ' 查询最小和最大 id')
query = f"SELECT MIN(id) AS min_id, MAX(id) AS max_id FROM {site_name}_all_syn_st_month_{date_info.replace('-', '_')} where state in (1,2)"
print(query)
# result = pd.read_sql(query, self.engine_pg)
result = engine.read_sql(query)
if result.shape[0] > 0:
min_id = result['min_id'].values[0]
max_id = result['max_id'].values[0]
# 分组数
num_groups = 150
group_size = math.ceil((max_id - min_id + 1) / num_groups)
# 创建分组范围
group_ranges = [(i, i + group_size - 1) for i in range(min_id, max_id + 1, group_size)]
id_list = []
# 打印分组范围
for group_start, group_end in group_ranges:
print(f"Group: {group_start} - {group_end}")
if site_name == 'us':
id_list.append([f'{group_start}-{group_end}', date_info])
else:
id_list.append([f'{group_start}-{group_end}', date_info])
print(id_list)
df_asin_img_video = pd.DataFrame(data=id_list, columns=['minid_maxid', 'yaer_month'])
delete_sql = f'DELETE from {site_name}_syn_asin_all_minid_maxid where state <4'
print('delete_sql::', delete_sql)
engine_mysql.execute(delete_sql)
engine_mysql.to_sql(df_asin_img_video, f'{site_name}_syn_asin_all_minid_maxid', if_exists='append')
if __name__ == '__main__':
# site_name = 'us'
# date_type = 'month'
......@@ -115,4 +162,5 @@ if __name__ == '__main__':
date_info = sys.argv[3] # 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data(site_name, date_type, date_info)
if site_name in ["us", "uk", "de"] and date_type == "month":
get_minid_maxid(site_name, date_type, date_info)
update_workflow_manager(site_name, date_type, date_info)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment