import sys
import os
from datetime import datetime, timedelta

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from multiprocessing import Pool
from amazon_spider.search_term_pg_week import search_temp_pg
from amazon_spider.search_term_mysql import search_temp_mysql
from threading_spider.db_connectivity import connect_db
import time
import pandas as pd
import random
import requests
import socket
from amazon_spider.VPS_IP import pppoe_ip
from threading_spider.post_to_dolphin import DolphinschedulerHelper


def select_sate_mysql(site, num=None, week=None):
    db_class = connect_db(site)
    engine_pg = db_class.pg_db()  # pg
    cursor_us, connect_us = db_class.us_mysql_db()  # us站点
    print('week::', week)
    if int(week) < 10:
        week = f'0{int(week)}'
    else:
        week = week
    if num is None:
        select_state1_sql = f"select status_val from workflow_progress where site_name='{site}' and date_info='2024-{week}' and date_type='week' and page='ABA搜索词';"
        print(select_state1_sql)
        cursor_us.execute(select_state1_sql)
        site_sate1_list = cursor_us.fetchone()
        try:
            if site_sate1_list[0] == 2:
                print(f"{site} 站点  {week} 周 搜索词 已完成导入到  search_term 执行下一步")
                state1_sql = f"select status_val from workflow_progress where site_name='{site}' and date_info='2024-{week}' and date_type='week' and page='反查搜索词';"
                print(state1_sql)
                cursor_us.execute(state1_sql)
                sate1_list = cursor_us.fetchone()
                print('查看状态：', sate1_list)
                if sate1_list[0] in (1, 2):
                    update_workflow_progress_2 = f"update workflow_progress set status_val=2,status='抓取中' where page='反查搜索词' and date_info='2024-{week}' and site_name='{site}' and date_type='week' and status_val=1"
                    print('修改状态2：：', update_workflow_progress_2)
                    cursor_us.execute(update_workflow_progress_2)
                    connect_us.commit()
                    return True
                else:
                    print(33333333333336666666666666666666666666)
                    return False
        except:
            return False
    if num == 1:
        # 搜索词多进程已经抓完。最后执行单进程抓取。
        sql_read = f'SELECT id FROM {site}_search_term_syn where state in (1,2) and week={week} LIMIT 1'
        print(sql_read)
        df = pd.read_sql(sql_read, con=engine_pg)
        id_tuple = list(df.id)
        if len(id_tuple) == 0:
            id_tuple = None
        print('id_tuple：', id_tuple)
        if id_tuple is None:
            status_sql = f"select status_val from workflow_progress where site_name='{site}' and date_info='2024-{week}' and date_type='week' and page='反查搜索词';"
            print('status_sql:', status_sql)
            cursor_us.execute(status_sql)
            status_list = cursor_us.fetchone()
            print(status_list)
            if status_list:
                if status_list[0] == 2:
                    update_workflow_progress = f"update workflow_progress set status_val=3,status='抓取结束' where page='反查搜索词' and date_info='2024-{week}' and site_name='{site}' and date_type='week'"
                    print('update_workflow_progress: 修改状态3 ', update_workflow_progress)
                    cursor_us.execute(update_workflow_progress)
                    connect_us.commit()
                    account = 'pengyanbing,chenyuanjie,hezhe,wangrui4,fangxingjun'
                    title = site + '站点 搜索词'
                    content = str(week) + ' 周 搜索词 已结束,请确认下一步流程!时间:'
                    db_class.send_mg(account, title, content)
                    DolphinschedulerHelper.start_process_instance_common(
                        project_name="big_data_selection",
                        process_df_name='ALL站点-asin同步-pg-api',
                        startParams={
                            "site_name": f"{site}",
                            "date_type": "week",
                            "date_info": f'2024-{week}'
                        }
                    )
                    if site == 'us':
                        pass
            return True
        else:
            print('5555555555555555555555555555555555')
            return False

    if num == 3:
        # 搜索词多进程已经抓完。最后执行单进程抓取。
        sql_read = f'SELECT id FROM {site}_search_term_syn where state in (1,2) and week={week} LIMIT 1'
        print(sql_read)
        df = pd.read_sql(sql_read, con=engine_pg)
        id_tuple = list(df.id)
        if id_tuple:
            id_tuple = [1]
        else:
            id_tuple = None
        if id_tuple is None:
            select_sate_mysql(site, num=1, week=week)
            return False
        else:
            return True

def get_ip_address():
    # 返回内网ip
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(('baidu.com', 0))
    ip = s.getsockname()[0]
    return ip


def long_time_task(site, proxy_name, week):
    print("当前 抓取 站点 ", site)
    if site == 'us':
        spider_us = search_temp_pg(site_name=site, read_size=300, proxy_name=proxy_name, week=week)
        spider_us.run_pol()
    else:
        spider_mysql = search_temp_mysql(site_name=site, read_size=300, proxy_name=proxy_name,
                                         week=week)
        spider_mysql.run_pol()
        print('结束')


def start_run(week):
    pppoe_ip()
    proxy_name = None
    site_list = ['us','de', 'uk']
    for site in site_list:
        if select_sate_mysql(site, week=week):
            while True:
                current_time = datetime.now()
                # 计算后五分钟的时间
                five_minutes_later = current_time + timedelta(minutes=1)
                print('后五分钟时间', five_minutes_later)
                p = Pool(3)
                for i in range(3):
                    p.apply_async(long_time_task, args=(site, proxy_name, week))
                print('等待所有子进程运行完成')
                # 执行该方法后不能继续添加新的Process
                p.close()
                # 等待所有子进程执行完毕
                p.join()
                print('所有进程运行完毕！')
                print('所有进程运行完毕！开始切换ip')
                print('所有进程运行完毕！开始切换ip')
                print('所有进程运行完毕！开始切换ip')
                print('所有进程运行完毕！开始切换ip')
                current_time = datetime.now()
                if current_time > five_minutes_later:
                    pppoe_ip()
                else:
                    time.sleep(random.uniform(200, 300))
                    pppoe_ip()
                if select_sate_mysql(site, num=3, week=week) == False:
                    break


if __name__ == '__main__':
    try:
        week = int(sys.argv[1])
    except:
        week = None
    start_run(week)
