import time

import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool
from sqlalchemy import text
from sqlalchemy.orm import sessionmaker
import platform
import traceback
class ConnectSpider:
    def __init__(self):
        self.pg_port = 54328
        self.pg_db = "selection"
        self.pg_user = "postgres"
        self.pg_pwd = "fazAqRRVV9vDmwDNRNb593ht5TxYVrfTyHJSJ3BS"
        self.pg_host = "61.145.136.61"
        self.db_engine = create_engine(f"postgresql://{self.pg_user}:{self.pg_pwd}@{self.pg_host}:{self.pg_port}/{self.pg_db}")

    def con (self):
        # pg_port = 54328
        # pg_db = "selection"
        # pg_user = "postgres"
        # pg_pwd = "fazAqRRVV9vDmwDNRNb593ht5TxYVrfTyHJSJ3BS"
        # pg_host = "61.145.136.61"
        # db_engine = create_engine(f"postgresql://{pg_user}:{pg_pwd}@{pg_host}:{pg_port}/{pg_db}")
        # return db_engine
        if platform.system().lower() == 'windows':
            PG_CONN_DICT = {
                "pg_port": 5432,
                "pg_db": "selection",
                "pg_user": "postgres",
                "pg_pwd": "fazAqRRVV9vDmwDNRNb593ht5TxYVrfTyHJSJ3BS",
                "pg_host": "192.168.10.223",
            }
        else:
            PG_CONN_DICT = {
                "pg_port": 54328,
                "pg_db": "selection",
                "pg_user": "postgres",
                "pg_pwd": "fazAqRRVV9vDmwDNRNb593ht5TxYVrfTyHJSJ3BS",
                "pg_host": "61.145.136.61",
            }
        nums = 0
        while True:
            try:
                db = 'selection'
                engine_pg = create_engine(
                    f"postgresql+psycopg2://{PG_CONN_DICT['pg_user']}:{PG_CONN_DICT['pg_pwd']}@{PG_CONN_DICT['pg_host']}:{PG_CONN_DICT['pg_port']}/{db}",
                    encoding='utf-8', connect_args={"connect_timeout": 10}, poolclass=NullPool)
                return engine_pg
            except Exception as e:
                print("pg_connect 14 t11111111111111111111111:", e, f"\n{traceback.format_exc()}")
                continue

    def mysql(self):
        sql_port = 3306
        sql_db = "selection"
        sql_user = "adv_yswg"
        sql_pwd = "Gd1pGJog1ysLMLBdML8w81"
        sql_host = "rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com"
        charset = 'utf8mb4'

        # 创建数据库连接字符串
        connection_string_mysql = f"mysql+pymysql://{sql_user}:{sql_pwd}@{sql_host}:{sql_port}/{sql_db}?charset={charset}"
        mysql_engine = create_engine(connection_string_mysql)
        return mysql_engine

    def get_asin(self, batch_size, offset):
        db_engine = self.con()
        table_name = "us_asin_detail_month_2024"
        asin_list = []

        # 构建带有偏移量的查询语句
        query = f"SELECT asin FROM {table_name} LIMIT {batch_size} OFFSET {offset}"
        df_status = pd.read_sql(query, con=db_engine)
        # 将新获取的ASIN添加到列表中
        new_asins = df_status['asin'].tolist()
        asin_list.extend(new_asins)
        return asin_list

    def get_cookie(self):
        db_engine = self.con()
        table_name = "detail_cookies_wj"
        query = f"SELECT cookie_value FROM {table_name} ORDER BY RANDOM() LIMIT 1"
        df_status = pd.read_sql(query, con=db_engine)
        try:
            cookie_str = df_status['cookie_value'].iloc[0]
        except IndexError:
            print("没有找到cookie记录")
            return None
        return cookie_str
    # 测试
    def us_asin_split_wj(self,i):
        db_engine = self.con()
        connection = db_engine.connect()
        table_name = "us_asin_split_wj"
        query = f"SELECT minid_maxid,id FROM {table_name} WHERE STATE = 3 "
        df_status = pd.read_sql(query, con=db_engine)
        try:
            minid_maxid = df_status['minid_maxid'].iloc[i]
            id_ = df_status['id'].iloc[i]
            sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id = {id_}")
            result = connection.execute(sql_update, {"id_": id_})

        except IndexError:
            print("没有找到记录")
            return None
        return minid_maxid

    def asin_split50_test_wj(self,i):
        db_engine = self.con()
        connection = db_engine.connect()
        table_name = "asin_split50_test_wj"
        query = f"SELECT minid_maxid,id FROM {table_name} WHERE ( STATE = 3 ) "
        df_status = pd.read_sql(query, con=db_engine)
        try:
            minid_maxid = df_status['minid_maxid'].iloc[i]
            id_ = df_status['id'].iloc[i]
            sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id = {id_}")
            result = connection.execute(sql_update, {"id_": id_})
            connection.commit()

        except IndexError:
            print("没有找到记录")
            return None
        return minid_maxid


    def ebay_asin_split_wj(self,i):
        db_engine = self.con()
        table_name = "ebay_asin_split_wj"
        with db_engine.connect() as connection:
            query = f"SELECT minid_maxid,id FROM {table_name} WHERE ( STATE = 3 ) "
            df_status = pd.read_sql(query, con=db_engine)
            try:
                minid_maxid = df_status['minid_maxid'].iloc[i]
                id_ = df_status['id'].iloc[i]
                sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id = {id_}")
                result = connection.execute(sql_update, {"id_": id_})

            except IndexError:
                print("没有找到记录")
                return None
            return minid_maxid


    def us_all_syn_st_month_2024_05(self, start_id, limit):
        db_engine = self.con()
        table_name = "us_all_syn_st_month_2024_05"

        # 查询构造，使用BETWEEN简化范围选取
        query = f"SELECT asin, id FROM {table_name} WHERE id BETWEEN {start_id} AND {start_id + limit - 1};"
        connection = db_engine.connect()
        df_status = pd.read_sql(query, con=connection)

        # asin_id_pairs = list(df_status[['asin'+ '|-|-|-|-|-|' + 'id']].itertuples(index=False, name=None))
        asin_id_pairs = [f"{asin}|-|-|-|-|-|{id}" for asin, id in zip(df_status['asin'], df_status['id'])]
        # print(asin_id_pairs)

        # 使用text构造更新语句，直接传入ids_to_update tuple
        sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id IN :ids")
        connection.execute(sql_update, {"ids": tuple(df_status['id'])})

        return asin_id_pairs

    # 修改状态为21
    def upstate_to_21(self,success_id):
        db_engine = self.con()
        table_name = "us_all_syn_st_month_2024_05"
        connection = db_engine.connect()
        # end_id = start_id+limit-1
        success_id = tuple(success_id)
        sql_update = text(f"UPDATE {table_name} SET state = 21 WHERE id IN :success_id")
        result = connection.execute(sql_update, {"success_id": success_id})
        print('成功更新为21')
        connection.close()

    def upstate_to_3(self,failed_id):
        db_engine = self.con()
        table_name = "us_all_syn_st_month_2024_05"
        connection = db_engine.connect()
        # end_id = start_id + limit - 1
        failed_id = tuple(failed_id)
        sql_update = text(f"UPDATE {table_name} SET state = 3 WHERE id IN :failed_id")
        result = connection.execute(sql_update, {"failed_id": failed_id})
        print('成功更新为3')
        connection.close()



    def set_asins(self):
        table_name = "us_asin_detail_month_2024"
        start_id = 36566683
        end_id = 36566693
        # end_id = 37666666

        query = f"SELECT asin, id FROM {table_name} WHERE id BETWEEN {start_id} AND {end_id} and date_info = '2024-05'"
        df_status = pd.read_sql(query, con=self.db_engine)

        # 将新获取的ASIN和ID分别添加到列表中
        asin_list = df_status['asin'].tolist()
        id_list = df_status['id'].tolist()

        return asin_list, id_list

    def creat_table(self,item):
        table_name = "ebay_asin_split_wj"
        columns = ['minid_maxid','state','year_month','updated_at']
        df = pd.DataFrame([item], columns=columns)
        try:
            with self.db_engine.begin() as connection:
                df.to_sql(table_name, con=connection, if_exists='append', index=False)
                print('保存成功')
        except Exception as e:
            print(f"数据保存失败: {e}")

    def mysql_cookies(self):
        sql_engine = self.mysql()
        table_name = "us_cookies"

        query = f"SELECT cookies FROM {table_name} WHERE created_time >= '2024-06-11 00:00:00' AND created_time <= '2024-06-12 23:59:59' LIMIT 10000;"
        result_df = pd.read_sql(query, sql_engine)
        cookie_list = result_df['cookies'].tolist()
        return cookie_list

    def save_asin_detail(self, item):
        db_engine = self.con()
        table_name = "us_asin_detail_month_2024_wj"
        columns = ['asin', 'img_url', 'title', 'title_len', 'price', 'rating', 'total_comments', 'buy_box_seller_type',
                   'page_inventory', 'category', 'volume', 'weight', 'rank', 'launch_time', 'img_num', 'img_type',
                   'brand', 'node_id', 'buy_sales', 'date_info', 'created_time']
        # 创建 DataFrame
        df = pd.DataFrame([item], columns=columns)
        try:
            with self.db_engine.begin() as connection:
                df.to_sql(table_name, con=connection, if_exists='append', index=False)
                print('保存成功')
        except Exception as e:
            print(f"数据保存失败: {e} {item['asin']}")

    def save_cookies(self, item):
        table_name = "detail_cookies_wj"

        if 'cookie_value' not in item or item['cookie_value'] is None:
            print("cookie_value缺失或为空，保存失败！")
            return

        columns = ['cookie_value', 'created_time']
        df = pd.DataFrame([item], columns=columns)
        try:
            # 不再使用index参数，假设数据库表已有自增ID设置
            df.to_sql(
                name=table_name,
                con=self.db_engine,
                if_exists='append',
                index=False  # 不使用DataFrame的索引作为数据库的id
            )
            print("cookie保存成功！")
        except Exception as e:
            print(f"cookie保存失败: {e}")

    def save_ebay_cookies(self,item):
        table_name = "ebay_cookies_wj"
        # print(f"成功连接到{table_name}数据库")
        columns = ['cookie_value', 'created_time']
        df = pd.DataFrame([item], columns=columns)

        try:
            df.to_sql(
                name=table_name,
                con=self.db_engine,
                if_exists='append',
                index=False  # 不使用DataFrame的索引作为数据库的id
            )
            print("cookie保存成功！")
        except Exception as e:
            print(f"cookie保存失败: {e}")

    def get_ebay_cookies(self):
        table_name = "ebay_cookies_wj"
        query = f"SELECT cookie_value FROM ebay_cookies_wj WHERE created_time > '2024-06-22 00:00:00' ;"
        result_df = pd.read_sql(query, self.db_engine)
        cookie_list = result_df['cookie_value'].tolist()
        return cookie_list

    def save_ebay_asins(self,items_to_save):
        db_engine = self.con()
        table_name = "ebay_asins_wj"
        columns = ['asin', 'created_time','state']
        df = pd.DataFrame(items_to_save, columns=columns)
        try:
            df.to_sql(
                name=table_name,
                con= db_engine,
                if_exists='append',
                index=False  # 不使用DataFrame的索引作为数据库的id
            )
            print("asin保存成功！")
        except Exception as e:
            print(f"asin保存失败: {e}")

    def save_ebay_search_term_wj(self,item):
        table_name = "ebay_search_term_wj"
        columns = ['search_term', 'created_time', 'state']
        df = pd.DataFrame([item], columns=columns)
        try:
            df.to_sql(
                name=table_name,
                con=self.db_engine,
                if_exists='append',
                index=False  # 不使用DataFrame的索引作为数据库的id
            )
            print("search_term保存成功！")
        except Exception as e:
            print(f"search_term保存失败: {e}")

    def get_ebay_search_term(self):
        sql_engine = self.mysql()
        table_name = "us_brand_analytics_2024"

        query = f"select DISTINCT(search_term) from {table_name} WHERE `week`=23 limit 10000;"
        result_df = pd.read_sql(query, sql_engine)
        search_term = result_df['search_term'].tolist()
        return search_term


    def get_ebay_asins(self,start_id,limit):
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        query = f"SELECT asin, id FROM {table_name} WHERE id BETWEEN {start_id} AND {start_id + limit - 1} and state != '21';"
        # print(query)
        connection = db_engine.connect()
        df_status = pd.read_sql(query, con=connection)
        asin_id_pairs = [f"{asin}|-|-|-|-|-|{id}" for asin, id in zip(df_status['asin'], df_status['id'])]
        sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id IN :ids")
        connection.execute(sql_update, {"ids": tuple(df_status['id'])})
        return asin_id_pairs

    def get_ebay_asins_21(self):
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        query = f"SELECT asin, id FROM {table_name} WHERE state == '3' "
        # print(query)
        connection = db_engine.connect()
        df_status = pd.read_sql(query, con=connection)
        asin_id_pairs = [f"{asin}|-|-|-|-|-|{id}" for asin, id in zip(df_status['asin'], df_status['id'])]
        sql_update = text(f"UPDATE {table_name} SET state = 20 WHERE id IN :ids")
        connection.execute(sql_update, {"ids": tuple(df_status['id'])})
        return asin_id_pairs

    def get_ebay_asins11111(self):
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        asin_list = []

        query = f"SELECT asin FROM {table_name} LIMIT 1"
        df_status = pd.read_sql(query, con=db_engine)
        new_asins = df_status['asin'].tolist()
        asin_list.extend(new_asins)
        return asin_list


    def save_ebay_details(self, item):
        db_engine = self.con()
        table_name = "ebay_asin_details_month_2024_wj"
        columns = ['asin', 'img_url', 'title', 'title_len', 'price', 'rating', 'total_comments', 'category', 'volume', 'weight', 'img_num','date_info', 'created_time']
        # 创建 DataFrame
        df = pd.DataFrame([item], columns=columns)
        try:
            with db_engine.begin() as connection:
                df.to_sql(table_name, con=connection, if_exists='append', index=False)
                print('保存成功')
        except Exception as e:
            print(f"数据保存失败: {e}")

    def ebay_upstate_to_3(self, failed_id):
        print('开始更新为3..............')
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        with db_engine.begin() as connection:
            # print(len(failed_id))
            if len(failed_id) == 1:
                sql_update = f"UPDATE {table_name} SET state = '3' WHERE id = '{failed_id[0][0]}'"
            else:
                flat_failed_id = [fid[0] for fid in failed_id]
                sql_update = f"UPDATE {table_name} SET state = '3' WHERE id IN {tuple(flat_failed_id)}"

            connection.execute(sql_update)
            print(sql_update, "执行中...")

        print('成功更新为3')

    def ebay_upstate_to_4(self, invalid_id):
        print('开始更新为4..............')
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        with db_engine.begin() as connection:
            # print(len(failed_id))
            if len(invalid_id) == 1:
                sql_update = f"UPDATE {table_name} SET state = '4' WHERE id = '{invalid_id[0][0]}'"
            else:
                flat_invalid_id = [fid[0] for fid in invalid_id]
                sql_update = f"UPDATE {table_name} SET state = '4' WHERE id IN {tuple(flat_invalid_id)}"

            connection.execute(sql_update)
            print(sql_update, "执行中...")

        print('成功更新为4')


    def ebay_upstate_to_21(self, success_id):
        print('开始更新为21..............')
        db_engine = self.con()
        table_name = "ebay_asin_wj"
        with db_engine.begin() as connection:
            if len(success_id[0]) == 1:
                sql_update = f"UPDATE {table_name} SET state = '21' WHERE id = '{success_id[0][0]}'"
            else:
                sql_update = f"UPDATE {table_name} SET state = '21' WHERE id IN {tuple(success_id)}"

            connection.execute(sql_update)
            print(sql_update, "执行中...")

        print('成功更新为21')

    def save_ebay_details_bulk(self, items_list):
        """
        批量保存数据到数据库。
        """
        db_engine = self.con()
        table_name = "ebay_asin_details_month_2024_wj"
        columns = ['asin', 'img_url', 'title', 'title_len', 'price', 'rating', 'total_comments', 'category', 'volume',
                   'weight', 'img_num', 'date_info', 'created_time']
        asin_detail_list = []
        for items in items_list:
            item_list = []
            for i in columns:
                item_list.append(items[i])
            asin_detail_list.append(item_list)
        print(asin_detail_list,'asin_detail_listasin_detail_listasin_detail_list')
        with db_engine.begin() as connection:
            # 将items列表转换为DataFrame
            df = pd.DataFrame(asin_detail_list, columns=columns)
            df.to_sql(table_name, con=connection, if_exists='append', index=False)
            print(f'批量保存{len(items_list)}条数据成功')

    def pic_data(self, item):
        """
        批量保存数据到数据库。
        """
        db_engine = self.con()
        table_name = "pic_data_wj"
        columns = ['pic_name', 'downloadUrl', 'state', 'created_time']
        # 创建 DataFrame
        df = pd.DataFrame([item], columns=columns)
        try:
            with db_engine.begin() as connection:
                df.to_sql(table_name, con=connection, if_exists='append', index=False)
                print('保存成功')
        except Exception as e:
            print(f"数据保存失败: {e}")








