import pandas as pd
from urllib.parse import quote
from datetime import datetime
import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.db_connect import BaseUtils
# 从数据库获取数据的函数
def get_data_from_database(engine_pg, query):
    result_df = engine_pg.read_sql(query)
    return result_df


def db_read_data(engine_pg):
    # 初始化一个空的 DataFrame
    result_list = []
    query = f"SELECT search_term FROM us_search_term_month_merchantwords WHERE state=1"
    print(query)
    result_df = get_data_from_database(engine_pg, query)
    result_df.drop_duplicates(['search_term'], inplace=True)
    print('us_search_term_month_merchantwords::', result_df.shape)
    # 对每个搜索关键词生成 URL 并添加到结果列表
    for search_term in result_df['search_term']:
        urls = build_urls(search_term)
        result_list.extend(urls)
    # 创建初始 DataFrame
    df_search_term = pd.DataFrame(data=result_list, columns=['search_term', 'url'])
    print(df_search_term.shape)
    df_search_term['date_info'] = str(datetime.now().strftime("%Y-%m-%d"))
    print('date_info::',df_search_term['date_info'])
    # 找出超过 450 字符长度的 URL 行的索引
    long_url_rows = df_search_term['url'].str.len() <= 450
    # 筛选保留不超过 450 字符长度的 URL 行
    data_df = df_search_term[long_url_rows]
    print('pg6 写入数据 merchantwords')
    with engine_pg.begin() as conn:
        engine_pg.to_sql(data_df,'us_merchantwords_search_term_month_syn_2025', if_exists="append")
        update_sql = f"update us_search_term_month_merchantwords set state =3 where state=1"
        print(update_sql)
        conn.execute(update_sql)
        deletesql = f"DELETE from us_merchantwords_search_term_month_syn_2025 where state =3"
        print(deletesql)
        conn.execute(deletesql)


# 构建 URL 的函数
def build_urls(search_term):
    url_template = f"https://www.amazon.com/s?k={{search_term}}&page={{page_number}}"
    search_term_chinese = quote(search_term, 'utf-8')
    search_term_chinese = search_term_chinese.replace("'", '%27').replace("/", '%2F')
    urls = [
        url_template.format(
            search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                              '%28').replace(
                ')', '%29'), page_number=1),
        url_template.format(
            search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                              '%28').replace(
                ')', '%29'), page_number=2),
        url_template.format(
            search_term=search_term_chinese.replace(' ', '+').replace('&', '%26').replace('#', '%23').replace('(',
                                                                                                              '%28').replace(
                ')', '%29'), page_number=3)
    ]

    return [[search_term, url] for url in urls]
if __name__ == '__main__':
    # 传一个 数据库链接
    engine_pg = BaseUtils(site_name='us').pg_connect_6()
    db_read_data(engine_pg)