import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import pandas as pd
from utils.db_connect import BaseUtils
import math
from urllib.parse import quote


class count_all_syn_st_id(BaseUtils):
    def __init__(self, site_name=None,month=None,data_info=None,engine_db_num=14):
        super().__init__()
        if int(month) < 10:
            month = '0' + str(month)
        self.site_name = site_name  # 站点
        self.month = month
        if engine_db_num == 14:
            self.engine = self.mysql_connect()
            self.engine_pg = self.pg_connect()
        self.engine_db_num = engine_db_num
        if site_name == "us":
            self.site_url = 'https://www.amazon.com/'
            self.host = 'www.amazon.com'
        elif site_name == 'uk':
            self.site_url = 'https://www.amazon.co.uk/'  # 站点url
            self.host = 'www.amazon.co.uk'
        elif site_name == 'de':
            self.site_url = 'https://www.amazon.de/'
            self.host = 'www.amazon.de'
        elif site_name == 'fr':
            self.site_url = 'https://www.amazon.fr/'
            self.host = 'www.amazon.fr'
        elif site_name == 'es':
            self.site_url = 'https://www.amazon.es/'
            self.host = 'www.amazon.es'
        elif site_name == 'it':
            self.site_url = 'https://www.amazon.it/'
            self.host = 'www.amazon.it'
        data_info = f'2025-{month}'
        self.data_info = data_info
        self.table_data_info = data_info.replace('-','_')

    def get_minid_maxid(self):
        # 查询最小和最大 id
        print(self.site_name, ' 查询最小和最大 id')
        query = f"SELECT MIN(id) AS min_id, MAX(id) AS max_id FROM {self.site_name}_all_syn_st_month_{self.table_data_info} where state in (1,2)"
        print(query)
        # result = pd.read_sql(query, self.engine_pg)
        result = self.engine_pg.read_sql(query)
        if result.shape[0] > 0:
            min_id = result['min_id'].values[0]
            max_id = result['max_id'].values[0]
            # 分组数
            num_groups = 150
            group_size = math.ceil((max_id - min_id + 1) / num_groups)
            # 创建分组范围
            group_ranges = [(i, i + group_size - 1) for i in range(min_id, max_id + 1, group_size)]
            id_list = []
            # 打印分组范围
            for group_start, group_end in group_ranges:
                print(f"Group: {group_start} - {group_end}")
                if site == 'us':
                    id_list.append([f'{group_start}-{group_end}', self.table_data_info])
                else:
                    id_list.append([f'{group_start}-{group_end}', self.table_data_info])
            print(id_list)
            df_asin_img_video = pd.DataFrame(data=id_list, columns=['minid_maxid', 'yaer_month'])
            with self.engine.begin() as conn:
                delete_sql = f'DELETE from {self.site_name}_syn_asin_all_minid_maxid where state <4'
                print('delete_sql::',delete_sql)
                conn.execute(delete_sql)
            self.engine.to_sql(df_asin_img_video,f'{self.site_name}_syn_asin_all_minid_maxid',if_exists='append')

    # 从数据库获取数据的函数
    def get_data_from_database(self, connection, query):
        return pd.read_sql(query, connection)

    # 构建 URL 的函数
    def build_urls(self, search_term):
        url_template = f"{self.site_url}s?k={{search_term}}&page={{page_number}}"
        search_term_chinese = quote(search_term, 'utf-8')
        search_term_chinese = search_term_chinese.replace("'", '%27').replace("/", '%2F')
        urls = [
            url_template.format(
                search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                                  '%28').replace(
                    ')', '%29'), page_number=1),
            url_template.format(
                search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                                  '%28').replace(
                    ')', '%29'), page_number=2),
            url_template.format(
                search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                                  '%28').replace(
                    ')', '%29'), page_number=3)
        ]

        return [[search_term, url] for url in urls]


if __name__ == '__main__':
    import time
    # 根据 engine 选择那个库。爬虫库 14， 抓取me搜索词是6，爬虫一般使用14，根据情况调整
    month = 7
    engine_db_num = 14
    for site in ['de']:
    # for site in ['us']:
        time.sleep(0)
        count_all_syn_st_id(site_name=site,month=month).get_minid_maxid()

