import os
import sys
import datetime as dt
import pandas as pd
import time

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.common_util import CommonUtil
from utils.db_util import DBUtil
from utils.DolphinschedulerHelper import DolphinschedulerHelper


def get_month_week_lastday_dict(date_month_info):
    engine = DBUtil.get_db_engine('mysql', 'us')
    sql = f"""
                select DATE_FORMAT(`date`, '%Y-%m-%d') as `date`, year_week from date_20_to_30 dt 
                where `year_week` in (select year_week from date_20_to_30 where `year_month`='{date_month_info}' and week_day = 1) 
                and week_day = 7;
    """
    result = DBUtil.engine_exec_sql(engine, sql)
    rows = result.fetchall()
    # 将结果存储为 Pandas DataFrame
    df = pd.DataFrame(rows, columns=result.keys())
    month_week_lastday_dict = dict(zip(df['year_week'], df['date']))
    print(month_week_lastday_dict)
    engine.dispose()
    return month_week_lastday_dict


# 判断反查和流量选品是否月度下对应周是否均已执行过
def judge_asin_workflow_progress(site_name, date_month_info):
    progress_name_list = ['流量选品', '反查搜索词']
    week_lastday_dict = get_month_week_lastday_dict(date_month_info)
    lastday_list = list(week_lastday_dict.values())
    # 创建流程应该完成的对应周的数据的df
    progress_data = []
    for progress in progress_name_list:
        for week, date in week_lastday_dict.items():
            progress_data.append([site_name, progress, week, date])
    progress_df = pd.DataFrame(progress_data, columns=['site_name', 'page', 'year_week', 'date_info'])
    engine = DBUtil.get_db_engine('mysql', 'us')
    sql = f"""
            select site_name,page,date_type,date_info, 1 as finish_flag from workflow_progress  
                    where site_name = '{site_name}'
                    and date_type ='30_day' 
                    and page in ({CommonUtil.list_to_insql(progress_name_list)})
                    and status_val = 6 
                    and date_info in ({CommonUtil.list_to_insql(lastday_list)})
        """
    result = DBUtil.engine_exec_sql(engine, sql)
    rows = result.fetchall()
    complete_df = pd.DataFrame(rows, columns=result.keys())
    print("======================================================================")
    check_df = progress_df.merge(complete_df, on=["site_name", "page", "date_info"], how='left')
    print(check_df)
    # 判断对应月下面周的流程（"流量选品"和"反查搜索词"）是否均已完成
    is_finish_flag = check_df['finish_flag'].notna().all()
    if not (is_finish_flag):
        print(f"{site_name}站点的，当月对应周的流程暂未执行完成！")

    # 判断对应月的月报告是否存在
    sql = f"""
            select site_name,page,table_name,date_type,date_info from workflow_progress  
                    where site_name = '{site_name}'
                    and date_type ='month' 
                    and table_name = '{site_name}_search_term'
                    and date_info = '{date_month_info}'
    """
    result = DBUtil.engine_exec_sql(engine, sql)
    num_rows = result.rowcount
    # 判断是否月报告已经出来，如果为1条则说明已经有月报告
    month_report_flag = bool(num_rows == 1)
    if not (month_report_flag):
        print(f"暂未检测到{site_name}站点的{date_month_info}月度报告！")
    engine.dispose()

    # 判断海豚可执行的标签
    dolphin_start_flag = is_finish_flag and month_report_flag
    return dolphin_start_flag


# 　判断店铺月度流程是否可执行
def judge_fb_workflow_progress(site_name, date_month_info):
    progress_name_list = ['店铺Feedback', '店铺产品']
    engine = DBUtil.get_db_engine('mysql', 'us')
    sql = f"""
                select site_name,page,date_type,date_info, 1 as finish_flag from workflow_progress  
                        where site_name = '{site_name}'
                        and date_type ='month' 
                        and page in ({CommonUtil.list_to_insql(progress_name_list)})
                        and status_val >= 3 
                        and date_info = '{date_month_info}'
            """
    result = DBUtil.engine_exec_sql(engine, sql)
    rows = result.fetchall()
    complete_df = pd.DataFrame(rows, columns=result.keys())
    num_rows = complete_df.shape[0]
    print("数据行数:", num_rows)
    # 检索数据行数，确保店铺Feedback 和店铺产品都有记录，否则直接False

    print("======================================================================")
    print(complete_df)
    is_finish_flag = True if num_rows == 2 else False
    # 判断对应月下面周的流程（"店铺Feedback"和"店铺产品"）是否均已抓取完成
    if not (is_finish_flag):
        print(f"{site_name}站点的，当月【店铺Feedback】和【店铺产品】暂未抓取完成！")
    return is_finish_flag


# 判断是否月度下对应海豚流程执行过
def judge_dolphin_progress(process_df_name, site_name, date_info):
    engine = DBUtil.get_db_engine('mysql', 'us')
    sql = f"""
                select site_name,page,table_name,date_type,date_info from workflow_progress  
                        where site_name = '{site_name}'
                        and page = '海豚调度流程'
                        and date_type ='month' 
                        and table_name = '{process_df_name}'
                        and date_info = '{date_info}'
        """
    result = DBUtil.engine_exec_sql(engine, sql)
    num_rows = result.rowcount
    engine.dispose()
    # 判断该流程是否执行过，如果执行过则无需再触发
    dolphin_execute_flag = bool(num_rows == 1)
    if dolphin_execute_flag:
        print(f"【海豚流程】：{process_df_name},{date_info}月的已经执行过！")
    return dolphin_execute_flag


def save_dolphin_progress_record(process_df_name, site_name, date_info):
    engine = DBUtil.get_db_engine('mysql', 'us')
    save_sql = f"""
            REPLACE INTO selection.workflow_progress (site_name, page, table_name, date_type, date_info, status, status_val, is_end, over_date) 
            VALUES('{site_name}', '海豚调度流程', '{process_df_name}', 'month', '{date_info}', '当月流程启动记录', 1, '是', CURRENT_TIME)
    """
    DBUtil.engine_exec_sql(engine, save_sql)
    engine.dispose()


# 用于监听执行uk和de的月度流量选品+反查的海豚流程启动
def execute_asin_ukde_month_dolphin_api(date_info):
    site_name_list = ['uk', 'de']
    for site_name in site_name_list:
        process_df_name = f"{site_name}-月流程-ABA+反查(旧版)+流量选品(旧版)-api"
        start_flag = judge_asin_workflow_progress(site_name, date_info)
        if start_flag:
            dolphin_execute_flag = judge_dolphin_progress(process_df_name, site_name, date_info)
            if not dolphin_execute_flag:
                DolphinschedulerHelper.start_process_instance(
                    project_name="big_data_selection",
                    process_df_name=process_df_name,
                    startParams={
                        "site_name": site_name,
                        "date_type": 'month',
                        "date_info": date_info
                    },
                    warning_Type="ALL"
                )
                # 写入一条数据到流程中记录
                save_dolphin_progress_record(process_df_name, site_name, date_info)
                print(f"【{site_name}】站点的【{date_info}】流程启动成功！")
        else:
            print(f"{site_name}站点的{date_info}月流程未达成启动条件！")


# 用于监听执行各站点月度流量店铺流程的Feedback海豚流程启动
def execute_feedback_month_dolphin_api(date_info):
    site_name_list = ['us', 'de', 'uk']
    for site_name in site_name_list:
        process_df_name = f"店铺Feedback流程（新）-api"
        start_flag = judge_fb_workflow_progress(site_name, date_info)
        if start_flag:
            dolphin_execute_flag = judge_dolphin_progress(process_df_name, site_name, date_info)
            if not dolphin_execute_flag:
                DolphinschedulerHelper.start_process_instance(
                    project_name="big_data_selection",
                    process_df_name=process_df_name,
                    startParams={
                        "site_name": site_name,
                        "date_type": 'month',
                        "date_info": date_info
                    },
                    warning_Type="ALL"
                )
                # 写入一条数据到流程中记录
                save_dolphin_progress_record(process_df_name, site_name, date_info)
                print(f"【{site_name}】站点的{date_info}月流程启动成功！")
        else:
            print(f"{site_name}站点的{date_info}月流程未达成启动条件！")


if __name__ == '__main__':
    monitor_process = CommonUtil.get_sys_arg(1, None)  # 参数1：需要监听的流程
    # 获取当前时间
    current_time = dt.datetime.now()
    # 将时间格式化为 'year-month'--对应当前月份
    date_info_current = current_time.strftime('%Y-%m')

    # 计算上个月的开始日期
    first_day_of_current_month = current_time.replace(day=1)
    last_month = first_day_of_current_month - dt.timedelta(days=1)
    first_day_of_last_month = last_month.replace(day=1)
    # 将时间格式化为 'year-month'--对应当前月的上一个月份；
    date_info_last = first_day_of_last_month.strftime('%Y-%m')

    # 计数器，进行10次判验即可
    check_count = 1
    while (check_count <= 22):
        # 调用判断的封装方法
        if monitor_process == "流量选品+反查":
            execute_asin_ukde_month_dolphin_api(date_info=date_info_last)
        elif monitor_process == "店铺流程":
            execute_feedback_month_dolphin_api(date_info=date_info_current)
        else:
            print("输入的监听流程有误，请检查！！")
            break
        time.sleep(3600)  # 休眠1小时
        check_count += 1
