Commit 61cd5e9d by fangxingjun

no message

parent 6b7f0ae7
import math
import os import os
import sys import sys
import time import time
import traceback import traceback
import pandas as pd
sys.path.append(os.path.dirname(sys.path[0])) sys.path.append(os.path.dirname(sys.path[0]))
from utils.secure_db_client import get_remote_engine from utils.secure_db_client import get_remote_engine
...@@ -106,6 +109,50 @@ def export_data(site_name, date_type, date_info): ...@@ -106,6 +109,50 @@ def export_data(site_name, date_type, date_info):
) )
def get_minid_maxid(site_name, date_type, date_info):
engine = get_remote_engine(
site_name=site_name, # -> database "selection"
db_type="postgresql_14", # -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
engine_mysql = get_remote_engine(
site_name=site_name, # -> database "selection"
db_type="mysql", # -> 服务端 alias "mysql"
# user="fangxingjun", # -> 服务端 alias "mysql"
# user_token="5f1b2e9c3a4d7f60" # 可不传,走默认
)
# 查询最小和最大 id
print(site_name, ' 查询最小和最大 id')
query = f"SELECT MIN(id) AS min_id, MAX(id) AS max_id FROM {site_name}_all_syn_st_month_{date_info.replace('-', '_')} where state in (1,2)"
print(query)
# result = pd.read_sql(query, self.engine_pg)
result = engine.read_sql(query)
if result.shape[0] > 0:
min_id = result['min_id'].values[0]
max_id = result['max_id'].values[0]
# 分组数
num_groups = 150
group_size = math.ceil((max_id - min_id + 1) / num_groups)
# 创建分组范围
group_ranges = [(i, i + group_size - 1) for i in range(min_id, max_id + 1, group_size)]
id_list = []
# 打印分组范围
for group_start, group_end in group_ranges:
print(f"Group: {group_start} - {group_end}")
if site_name == 'us':
id_list.append([f'{group_start}-{group_end}', date_info])
else:
id_list.append([f'{group_start}-{group_end}', date_info])
print(id_list)
df_asin_img_video = pd.DataFrame(data=id_list, columns=['minid_maxid', 'yaer_month'])
delete_sql = f'DELETE from {site_name}_syn_asin_all_minid_maxid where state <4'
print('delete_sql::', delete_sql)
engine_mysql.execute(delete_sql)
engine_mysql.to_sql(df_asin_img_video, f'{site_name}_syn_asin_all_minid_maxid', if_exists='append')
if __name__ == '__main__': if __name__ == '__main__':
# site_name = 'us' # site_name = 'us'
# date_type = 'month' # date_type = 'month'
...@@ -115,4 +162,5 @@ if __name__ == '__main__': ...@@ -115,4 +162,5 @@ if __name__ == '__main__':
date_info = sys.argv[3] # 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1 date_info = sys.argv[3] # 参数3:年-周/年-月/年-季/年-月-日, 比如: 2022-1
export_data(site_name, date_type, date_info) export_data(site_name, date_type, date_info)
if site_name in ["us", "uk", "de"] and date_type == "month": if site_name in ["us", "uk", "de"] and date_type == "month":
get_minid_maxid(site_name, date_type, date_info)
update_workflow_manager(site_name, date_type, date_info) update_workflow_manager(site_name, date_type, date_info)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment