import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from amazon_params import py_ja3
from sqlalchemy import create_engine
import pandas as pd
from queue import Queue
import threading
import time
import random
import urllib3
import uuid

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from amazon_spider.VPS_IP import is_internet_available
import requests

sess = requests.Session()
from datetime import datetime
import traceback
from utils.db_connect import BaseUtils
from amazon_spider.VPS_IP import pppoe_ip


class Amazon_Img():
    def __init__(self, site_name):
        self.site_name = site_name
        self.asin_img_queue = Queue()
        self.asin_state_list = []
        self.asin_not_find = []
        self.asin_imgurl_null = []
        self.asin_img_err = []
        self.asin_img_err_6 = [] # url错误
        self.asin_items = {}
        self.topic_asin_html = f'{site_name}_inv_image'

    def mysql_reconnect(self):
        self.kafuka_producer = BaseUtils().kafuka_connect(bootstrap_servers=True, acks=True)
        if self.site_name == 'us':
            self.engine_pg = create_engine(
                f'mysql+pymysql://XP_Yswg2025_PY:Gd1pGJog1ysLMLBdML8w81@rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com:3306/selection?charset=utf8mb4')

        else:
            self.engine_pg = create_engine(
                f'mysql+pymysql://XP_Yswg2025_PY:Gd1pGJog1ysLMLBdML8w81@rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com:3306/selection_{self.site_name}?charset=utf8mb4')

        print(self.engine_pg)

    def downlad_img(self):
        while True:
            if self.asin_img_queue.empty() == False:
                querys = self.asin_img_queue.get()
                query = querys.split('|-|')
                print('请求：：：', query)
                id_segment = query[0]
                id = query[1]
                img_id = query[2]
                img_type = query[3]
                img_url = query[4]
                headers = {}
                img_id_num = f'{id_segment}_{id}_{img_id}_{img_type}'
                if '.com' not in img_url:
                    self.asin_img_err.append(id)
                    continue
                try:
                    ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(90, 114)}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
                    headers['user-agent'] = ua
                    headers['accept'] = 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8'
                    headers[
                        'accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
                    alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n']
                    k = ""
                    for i in (0, random.randint(0, 5)):
                        k += random.choice(alphabet)
                    headers[k] = str(uuid.uuid4())
                    # sess.mount("'https://m.media-amazon.com", py_ja3.DESAdapter())
                    r = sess.get(img_url, headers=headers, timeout=30, verify=False)  # 获取网页
                except Exception as e:
                    print('========================请求报错：', e)
                    if 'getaddrinfo failed' in str(e) or 'Failed to establish a new connection' in str(e) or\
                        'No connection adapters were found for' in str(e):
                        print('改成 6 ')
                        self.asin_img_err_6.append(id)
                    else:
                        self.asin_not_find.append(id)
                    continue
                try:
                    # 获取当前时间
                    current_time = datetime.now()
                    # 格式化时间为 'YYYYMMDDHH' 格式
                    formatted_time = current_time.strftime('%Y%m%d%H')
                    # asin_upper = asin.upper()
                    # print(asin, "存储路径", f"{asin_img_path}{asin_upper}.jpg")
                    # path_1 = fr"/run/{formatted_time}/mnt/data/img_data/{self.site_name}{asin_img_path}"
                    # if os.path.exists(path_1) == False:  # 判断路径是否存在
                    #     os.makedirs(path_1)
                    # with open(rf"D:\新建文件夹\html_selenium_files/{self.site_name}_{asin_upper}.jpg",
                    #           'wb') as f:  # 打开写入到path路径里-二进制文件，返回的句柄名为f
                    #     f.write(r.content)  # 往f里写入r对象的二进制文件
                    # us_self_asin_image
                    # img_content = f"{self.site_name}|-||=|-|=||-|{asin}|-||=|-|=||-|{r.content}"sd
                    img_content = f"{self.site_name}|-||=|-|=||-|{img_id_num}|-||=|-|=||-|{r.content}"
                    self.send_kafka(html_data=img_content, topic=self.topic_asin_html)
                    self.asin_state_list.append(id)
                except Exception as e:
                    print('++++++++++++++++++++++++++存储报错=====', e)
                    self.asin_not_find.append(id)
            else:
                break


    def on_send_success(self, record_metadata):
        print(f"消息发送成功: {record_metadata.topic}-{record_metadata.partition}-{record_metadata.offset}")

    def on_send_error(self, excp):
        print("消息发送失败", excp)

    def send_kafka(self, items=None, html_data=None, topic=None, asin=None):
        if not html_data:
            return  # 如果没有数据发送，直接返回

        for i in range(3):  # 尝试发送两次
            try:
                future = self.kafuka_producer.send(topic, html_data)
                future.add_callback(self.on_send_success).add_errback(self.on_send_error)
                break
            except Exception as e:
                print("发送消息时遇到错误", e, f"\n{traceback.format_exc()}")
                if i >= 0:
                    self.kafuka_producer = BaseUtils().kafuka_connect(bootstrap_servers=None, acks=True)
                if i >= 1:
                    self.asin_not_find.append(asin)  # 记录发送失败的 ASIN

        self.kafuka_producer.flush()  # 最后确保所有消息都被发送

    def update_asin_state(self, state=2, asin_list=None):
        # df = self.df_read.loc[(self.df_read.asin.isin(asin_list))]
        # id_tuple = tuple(df.id)
        id_tuple = tuple(asin_list)
        print(state, '修改状态::', len(id_tuple))
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.mysql_reconnect()
                print('修改状态')
                with self.engine_pg.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if asin_list:
                        if len(id_tuple) == 1:
                            sql_update = f"update {self.site_name}_inv_img_info set state={state} where id in ('{id_tuple[0]}');"
                        else:
                            sql_update = f"update {self.site_name}_inv_img_info set state={state} where id in {id_tuple};"
                        print(sql_update)
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(e, '444444444444')
                self.mysql_reconnect()
                time.sleep(5)
                continue

    def read_img_url(self):
        while True:
            try:
                if is_internet_available():
                    pass
                else:
                    self.mysql_reconnect()
                with self.engine_pg.begin() as conn:
                    sql_read = f'SELECT id,id_segment,img_id,img_type,img_url  FROM {self.site_name}_inv_img_info where state=1  LIMIT 5000;'
                    print(sql_read)
                    a = conn.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=['id', 'id_segment', 'img_id', 'img_type', 'img_url'])
                    # self.df_read.drop_duplicates(['asin'], inplace=True)
                    if self.df_read.shape[0] == 0:
                        return []
                    index_tuple = tuple(self.df_read['id'])
                    print('更改状态 2 ', len(index_tuple))
                    if len(index_tuple) == 1:
                        sql_update = f"""UPDATE {self.site_name}_inv_img_info a set state=2 where a.id in ('{index_tuple[0]}')"""
                    else:
                        sql_update = f"""UPDATE {self.site_name}_inv_img_info a set state=2 where a.id in {index_tuple}"""
                    conn.execute(sql_update)
                    # id_segment_id_img_id_img_type  1_1_2472_product_audit
                    asin_img_list = list(self.df_read.id_segment.astype("U") + '|-|' + self.df_read.id.astype(
                        "U") + '|-|' + self.df_read.img_id.astype("U") + '|-|' + self.df_read.img_type + '|-|' + self.df_read.img_url)
                    # print(asin_img_list)
                    return asin_img_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e)
                self.mysql_reconnect()
                time.sleep(3)
                continue

    def init_list(self):
        self.asin_img_queue = Queue()
        self.asin_state_list = []
        self.asin_not_find = []
        self.asin_imgurl_null = []
        self.asin_items = {}
        self.asin_img_err = []
        self.asin_img_err_6 = []
        self.kafuka_producer.close()

    def run(self):
        while True:
            # pppoe_ip()
            self.mysql_reconnect()
            asin_img_list = self.read_img_url()
            if asin_img_list:
                for asin_img in asin_img_list:
                    self.asin_img_queue.put(asin_img)
                html_thread = []
                for i in range(50):
                    thread2 = threading.Thread(target=self.downlad_img)
                    html_thread.append(thread2)
                for ti in html_thread:
                    ti.start()
                for t2 in html_thread:
                    t2.join()
                if self.asin_state_list:
                    self.update_asin_state(state=3, asin_list=self.asin_state_list)
                    self.asin_state_list = []
                if self.asin_not_find:
                    self.update_asin_state(state=1, asin_list=self.asin_not_find)
                    self.asin_not_find = []
                if self.asin_img_err:
                    self.update_asin_state(state=4, asin_list=self.asin_img_err)
                    self.asin_img_err = []
                if self.asin_img_err_6:
                    self.update_asin_state(state=6, asin_list=self.asin_img_err_6)
                    self.asin_img_err_6 = []
            else:
                break
            self.init_list()


if __name__ == '__main__':
    Amazon_Img('us').run()
