import random
import socket
import threading
import time
import json
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from loguru import logger
from py_spider.utils.secure_db_client import get_remote_engine
from inv_img_double_search import AmazonImageSearch
from mysql_db_new import sql_try_again


def process_single_task(task_id, url,search_mode,site_name):
    thread_name = f"Thread-{threading.get_ident()}"
    start_time = time.time()
    try:
        logger.info(f"[{thread_name}] ▶ 开始爬取 ID: {task_id}")

        # 1. 执行爬虫
        spider = AmazonImageSearch(site_name=site_name)
        result_data = spider.search(url,search_mode=search_mode)
        # 2. 存入结果
        # 无论业务逻辑是否成功，都原样存入，让 Flask 判断
        json_result = json.dumps(result_data, ensure_ascii=False)
        finish_time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        finish_sql = "UPDATE us_inv_img_result SET state = 3, result_data = %s, updated_at = %s WHERE id = %s"
        # with engine.begin() as conn:
        #     conn.execute(finish_sql, [(json_result, finish_time_str, task_id)])
        sql_try_again("sql_execute", finish_sql, data=[(json_result, finish_time_str, task_id)],site=site_name )
        cost_time = time.time() - start_time
        logger.success(f"[{thread_name}]  任务 {task_id} 完成 耗时: {cost_time:.2f}s ")

    except Exception as e:
        logger.error(f"[{thread_name}]  任务 {task_id} 崩溃: {e}")

def worker_loop():
    logger.info("VPS Worker 已启动，监听任务中...")

    # 线程池
    executor = ThreadPoolExecutor(max_workers=5)
    while True:
        try:
            # 连接数据库
            logger.info('正在轮询中')
            # engine = get_remote_engine(site_name='us', db_type="mysql")
            time.sleep(random.uniform(0.1, 0.5)) # 随机休眠 防止撞车
            # 1. 抢任务  优先处理早期任务
            sql_get = "SELECT id, img_url,search_mode,site_name FROM us_inv_img_result WHERE state = 1 ORDER BY id ASC LIMIT 1"
            df = sql_try_again("read_sql", sql_get)
            get_df_data = df.to_dict("records")
            # get_df_data = engine.read_sql(sql_get).to_dict("records") #  获取最新的状态为1的一条数据
            if get_df_data:
                task = get_df_data[0]
                task_id = task['id']
                url = task['img_url']
                search_mode = task['search_mode']
                site_name = task['site_name']
                logger.info(f"领取任务: {task_id}")

                # 2 更新状态为2 表示正在爬取
                now_time = datetime.now()  # 获取 VPS 当前时间
                now_time_str = now_time.strftime("%Y-%m-%d %H:%M:%S")
                up_two_sql  =  "UPDATE us_inv_img_result SET state = 2, updated_at = %s WHERE id = %s AND state = 1"
                # 更新 updated_at  设置状态为2正在爬取
                # with engine.begin() as conn:
                #     conn.execute(up_two_sql, [(now_time_str, task_id)])
                sql_try_again("sql_execute", up_two_sql, data=[(now_time_str, task_id)])
                # 3. 直接丢进线程池
                logger.info(f"提交任务: {task_id}-> 线程池")
                executor.submit(process_single_task, task_id, url,search_mode,site_name)
            else:
                 time.sleep(2) # 没任务 休眠一下
        except Exception as e:
            logger.error(f"数据库连接失败: {e}")
            time.sleep(2)


if __name__ == "__main__":
    worker_loop()