import sys
import os
import pandas as pd
sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
# from utils.templates import Templates
# from ..utils.templates import Templates
from utils.db_util import DbTypes, DBUtil


class ImportStToPg14(object):

    def __init__(self, site_name='us', date_type='month', date_info='2024-01'):
        self.site_name = site_name
        self.date_type = date_type
        self.date_info = date_info
        self.engine_mysql = DBUtil.get_db_engine(db_type=DbTypes.mysql.name, site_name=self.site_name)
        self.engine_pg14 = DBUtil.get_db_engine(db_type=DbTypes.postgresql_14.name, site_name=self.site_name)
        self.df_st_week = pd.DataFrame()
        self.df_st_month = pd.DataFrame()
        self.df_st_month_state = pd.DataFrame()
        self.df_save = pd.DataFrame()
        self.fetch_year_month_by_week()  # 如果传的date_type='week', 将date_info转换成月的值
        self.year, self.month = self.date_info.split("-")[0], int(self.date_info.split("-")[1])

    def fetch_year_month_by_week(self):
        if self.date_type == 'week':
            sql = f"select `year_month` from selection.date_20_to_30 WHERE year_week='{self.date_info}' and week_day=1;"
            df = pd.read_sql(sql, con=self.engine_mysql)
            self.date_info = list(df.year_month)[0]

    def read_data(self):
        # 1. 读取date_20_to_30表获取月份对应的周
        sql_get_week = f"select year_week, year, week from selection.date_20_to_30 WHERE `year_month`='{self.date_info}' and week_day=1"
        df_week = pd.read_sql(sql_get_week, con=self.engine_mysql)
        print(df_week.shape, "sql_get_week:", sql_get_week)
        # 2. 读取月对应周的搜索词, 并去重
        df_st_week_list = []
        for year, week in zip(df_week.year, df_week.week):
            sql = f"select DISTINCT(search_term) from {self.site_name}_brand_analytics_{year} WHERE `week`={week} limit 1500000;"
            df_st_week = pd.read_sql(sql, con=self.engine_mysql)
            print(year, week, df_st_week.shape, "sql:", sql)
            df_st_week_list.append(df_st_week)
        self.df_st_week = pd.concat(df_st_week_list)
        # 3. 读取mysql的月搜索词表
        sql_get_month = f"SELECT DISTINCT(search_term) from {self.site_name}_brand_analytics_month_{self.year} WHERE year={self.year} and `month`={self.month};"
        self.df_st_month = pd.read_sql(sql_get_month, con=self.engine_mysql)
        print(self.df_st_month.shape, "sql_get_month:", sql_get_month)
        # 4. 读取pg14的搜索词状态表
        sql_get_month_state = f"SELECT DISTINCT(search_term) from {self.site_name}_search_term_month WHERE date_info='{self.date_info}' and state !=1;"
        print("sql_get_month_state:", sql_get_month_state)
        self.df_st_month_state = pd.read_sql(sql_get_month_state, con=self.engine_pg14)
        print(self.df_st_month_state.shape, "sql_get_month_state:", sql_get_month_state)

    def handle_data(self):
        self.df_save = pd.concat([self.df_st_week, self.df_st_month])
        self.df_save = self.df_save.drop_duplicates(['search_term'])
        df_count_before = self.df_save.shape[0]
        self.df_save = self.df_save.loc[~self.df_save.search_term.isin(self.df_st_month_state.search_term)]
        self.df_save['month'] = self.month
        self.df_save['date_info'] = self.date_info
        df_count_after = self.df_save.shape[0]
        print(f"df_count_before:{df_count_before}, df_count_after:{df_count_after}")

    def save_data(self):
        with self.engine_pg14.begin() as conn:
            sql_delete = f"delete from {self.site_name}_search_term_month where date_info='{self.date_info}' and state=1;"
            print(f"sql_delete:", sql_delete)
            conn.execute(sql_delete)

        self.df_save.to_sql(f"{self.site_name}_search_term_month", con=self.engine_pg14, index=False, if_exists="append")
        # pass

    def run(self):
        self.read_data()
        self.handle_data()
        self.save_data()


if __name__ == '__main__':
    site_name = sys.argv[1]  # 参数1：站点
    date_type = sys.argv[2]  # 参数2：类型：month
    date_info = sys.argv[3]  # 参数3：月对应的值： 2024-01
    handle_obj = ImportStToPg14(site_name=site_name, date_type=date_type, date_info=date_info)
    handle_obj.run()
