import os
import sys
from datetime import datetime, timedelta

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from multiprocessing import Pool
from amazon_spider.search_term_pg import search_temp_pg
from threading_spider.db_connectivity import connect_db
import time
import random
from amazon_spider.VPS_IP import pppoe_ip
from threading_spider.post_to_dolphin import DolphinschedulerHelper
from utils.secure_db_client import get_remote_engine
from utils.db_connect import BaseUtils

def db_engine(site_name, db_type):
    """
    "mysql": "mysql",  # 阿里云mysql
    "postgresql_14": "postgresql_14",  # pg14爬虫库-内网
    "postgresql_14_outer": "postgresql_14_outer",  # pg14爬虫库-外网
    "postgresql_15": "postgresql_15",  # pg15正式库-内网
    "postgresql_15_outer": "postgresql_15_outer",  # pg15正式库-外网
    "postgresql_cluster": "postgresql_cluster",  # pg集群-内网
    "postgresql_cluster_outer": "postgresql_cluster_outer",  # pg集群-外网
    "doris": "doris",  # doris集群-内网
    """
    engine_mysql = get_remote_engine(
        site_name=site_name,  # -> database "selection"
        db_type=db_type,  # -> 服务端 alias "mysql"
    )
    return engine_mysql


def select_search_term_state(month, site):
    for i in range(5):
        try:
            sql_read = f"SELECT id FROM {site}_search_term_month_syn where state in (1,2) and date_info='2025-{month}' LIMIT 1"
            engine_pg = db_engine(site, 'postgresql_14_outer')
            df = engine_pg.read_sql(sql_read)
            if not df.empty:
                id_tuple = [1]
            else:
                id_tuple = None
            return id_tuple
        except Exception as e:
            print(e, '报错11。', sql_read)
        return 1


def db_cursor_connect_update(sql, site):
    for i in range(3):
        try:
            engine_us_mysql = db_engine('us', 'mysql')
            print('更新sql：', sql)
            with engine_us_mysql.begin() as conn:
                conn.execute(sql)
            break
        except:
            print(site, 'db_cursor_connect 报错：', sql)


def select_sate_mysql(site, num=None, month=None, week=None):
    db_class = connect_db(site)
    print('month::', month)
    if num == 1:
        sql_select_ = f"select status_val from workflow_progress where date_info='2025-{week}' and date_type='week' and page='反查搜索词' and site_name='{site}'"
        print(sql_select_)
        engine_us_mysql = db_engine('us', 'mysql')
        df = engine_us_mysql.read_sql(sql_select_)
        if int(df.status_val[0]) in (1, 2):
            redis_client = BaseUtils().redis_db()
            lock_key = "ALL站点-asin同步-pg-api_lock"
            lock = redis_client.lock(lock_key, timeout=15)  # 10秒超时
            update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='反查搜索词' and date_info='2025-{week}' and site_name='{site}' and date_type='week'"
            print('update_workflow_progress: 修改状态3 ', update_workflow_progress)
            db_cursor_connect_update(update_workflow_progress, site)
            account = 'pengyanbing'
            title = site + '站点 搜索词'
            content = f'{month} 月 搜索词 已结束,请确认下一步流程!时间:'
            db_class.send_mg(account, title, content)
            ii = 0
            for i in range(11):
                id_tuple = select_search_term_state(month, site)
                time.sleep(180)
                if id_tuple is None:
                    ii += 1
                    if ii > 8:
                        break

            if id_tuple is None:
                DolphinschedulerHelper.start_process_instance_common(
                    project_name="big_data_selection",
                    process_df_name='ALL站点-asin同步-pg-api',
                    startParams={
                        "site_name": f"{site}",
                        "date_type": "month",
                        "date_info": f'2025-{month}'
                    }
                )
                account = 'pengyanbing,chenyuanjie,hezhe,wangrui4,fangxingjun,chenjianyun,zhouyuchen'
                title = site + '站点 搜索词'
                content = f'{month} 月 搜索词 已结束,成功调度 ALL站点-asin同步-pg-api'
                db_class.send_mg(account, title, content)
                if lock.locked():
                    lock.release()
                return True
            if lock.locked():
                lock.release()
        else:
            print('5555555555555555555555555555555555')
            return False

    if num == 3:
        # 搜索词多进程已经抓完。最后执行单进程抓取。
        id_tuple = select_search_term_state(month, site)
        if id_tuple is None:
            select_sate_mysql(site, num=1, month=month, week=week)
            return False
        else:
            return True


def long_time_task(site, proxy_name, month):
    print("当前 抓取 站点 ", site)
    spider_us = search_temp_pg(site_name=site, read_size=300, proxy_name=proxy_name, month=month)
    spider_us.run_pol()


if __name__ == '__main__':
    pppoe_ip()
    site_list = ['us','de','uk']
    month = int(sys.argv[1])
    week = int(sys.argv[2])
    proxy_name = None
    if month < 10:
        month = '0' + str(month)
    if week < 10:
        week = '0' + str(week)
    print(month, week)
    for site in site_list:
        while True:
            current_time = datetime.now()
            # 计算后五分钟的时间
            five_minutes_later = current_time + timedelta(minutes=1)
            print('后五分钟时间', five_minutes_later)
            p = Pool(3)
            for i in range(3):
                p.apply_async(long_time_task, args=(site, proxy_name, month))
            print('等待所有子进程运行完成')
            # 执行该方法后不能继续添加新的Process
            p.close()
            # 等待所有子进程执行完毕
            p.join()
            print('所有进程运行完毕！')
            print('所有进程运行完毕！开始切换ip')
            print('所有进程运行完毕！开始切换ip')
            print('所有进程运行完毕！开始切换ip')
            print('所有进程运行完毕！开始切换ip')
            current_time = datetime.now()
            if current_time > five_minutes_later:
                pppoe_ip()
            else:
                time.sleep(random.uniform(150, 220))
                pppoe_ip()
            if select_sate_mysql(site, num=3, month=month, week=week) == False:
                break
