import sys
import os
sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from amazon_params import py_ja3
# import requests
from curl_cffi import requests,Curl
from sqlalchemy import create_engine
import pandas as pd
from queue import Queue
import threading
import time
import random
import os
from amazon_params.params import DB_CONN_DICT, PG_CONN_DICT
import urllib3
import re
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from utils.requests_param import Requests_param_val
# sess = requests.Session()
import traceback
class Amazon_Img():
    def __init__(self, site_name):
        self.site_name = site_name
        self.mysql_reconnect()
        self.asin_img_queue = Queue()
        self.asin_state_list = []
        self.asin_not_find = []
        self.asin_imgurl_null = []
        self.reuests_para_val = Requests_param_val(site_name=self.site_name, spider="seller_account_product")

    def mysql_reconnect(self):
        """
                Connection of mysql.
                """
        nums = 0
        while True:
            nums += 1
            try:
                if self.site_name == 'us':
                    db = 'selection'
                else:
                    db = f'selection_{self.site_name}'
                self.engine_pg =  create_engine(
                    f'mysql+pymysql://{DB_CONN_DICT["mysql_user"]}:' + f'{DB_CONN_DICT["mysql_pwd"]}@{DB_CONN_DICT["mysql_host"]}:{DB_CONN_DICT["mysql_port"]}/{db}?charset=utf8mb4')  # , pool_recycle=3600
                break
            except Exception as e:
                print("error_mysql_connect:", e, f"\n{traceback.format_exc()}")
                time.sleep(nums * 20)
                continue

    def downlad_img(self):
        while True:
            if self.asin_img_queue.empty() == False:
                querys = self.asin_img_queue.get()
                query = querys.split('|-|')
                print('请求：：：', query)
                asin = query[0]
                asin_img_path = 'amazon'
                img_url = query[1]
                headers = {

                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language':   'zh-CN,zh;q=0.9',
                    'Cache-Control':    'no-cache',
                    'Pragma':   'no-cache',

                }
                if img_url == 'null' or img_url == 'None' or img_url == '':
                    self.asin_imgurl_null.append(asin)
                    continue
                try:
                    ua = f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(90, 114)}.0.{random.randint(1000, 5000)}.{random.randint(1, 181)} Safari/537.36'
                    headers['user-agent'] = ua
                    # sess.mount("'https://m.media-amazon.com", py_ja3.DESAdapter())
                    item_proxy8 = {'http': 'http://t16208953323855:ib80eped@i537.kdltps.com:15818/',
                                   'https': 'http://t16208953323855:ib80eped@i537.kdltps.com:15818/'}
                    item_proxy9 = {'http': 'http://t16244760032579:0gj5rbnp@n378.kdltps.com:15818/',
                                   'https': 'http://t16244760032579:0gj5rbnp@n378.kdltps.com:15818/'}
                    item_proxy10 = {'http': 'http://t17780866032960:57b65ww2@i633.kdltps.com:15818/',
                                    'https': 'http://t17780866032960:57b65ww2@i633.kdltps.com:15818/'}
                    item_proxy11 = {'http': 'http://t16450128695918:2vqy8epc@x783.kdltps.com:15818/',
                                    'https': 'http://t16450128695918:2vqy8epc@x783.kdltps.com:15818/'}
                    item_proxy12 = {'http': 'http://t16450137765728:8cw2orjz@t357.kdltps.com:15818/',
                                    'https': 'http://t16450137765728:8cw2orjz@t357.kdltps.com:15818/'}
                    proxy_list = [item_proxy8, item_proxy9, item_proxy10, item_proxy11, item_proxy12]
                    proxy_ = random.choice(proxy_list)
                    curl = Curl(cacert="/path/to/your/cert")
                    session = requests.Session(curl=curl)
                    r = session.get(img_url, headers=headers,proxies=proxy_, timeout=60,verify=False,impersonate="chrome110")  # 获取网页
                except Exception as e:
                    print('========================请求报错：', e)
                    self.asin_not_find.append(asin)
                    continue
                try:
                    asin_upper = asin.upper()
                    print(asin, "存储路径", rf"{asin_img_path}/{asin_upper}.jpg")
                    path_1 = fr"/mnt/data/img_data/{asin_img_path}"
                    if os.path.exists(path_1) == False:  # 判断路径是否存在
                        os.makedirs(path_1)
                    with open(rf"/mnt/data/img_data/{asin_img_path}/{asin_upper}.jpg",
                              'wb') as f:  # 打开写入到path路径里-二进制文件，返回的句柄名为f
                        f.write(r.content)  # 往f里写入r对象的二进制文件
                    self.asin_state_list.append(asin)
                except Exception as e:
                    print('++++++++++++++++++++++++++存储报错=====', e)
                    self.asin_not_find.append(asin)
            else:
                break

    def update_asin_state(self, state=2, asin_list=None):
        df = self.df_read.loc[(self.df_read.asin_compet.isin(asin_list))]
        asin_tuple = tuple(df.asin_compet)
        print(state, '修改状态::', len(asin_tuple))
        while True:
            try:
                print('修改状态')
                with self.engine_pg.begin() as conn:
                    # 1,3：1--回滚；3--成功
                    if asin_list:
                        if len(asin_tuple) == 1:
                            sql_update = f"update us_self_asin_compet_amazon set state={state} where asin_compet in ('{asin_tuple[0]}') and state=2;"
                        else:
                            sql_update = f"update us_self_asin_compet_amazon set state={state} where asin_compet in {asin_tuple} and state=2;"
                        conn.execute(sql_update)
                break
            except Exception as e:
                print(e, '444444444444')
                self.mysql_reconnect()
                time.sleep(5)
                continue

    def read_img_url(self):
        while True:
            try:
                with self.engine_pg.begin() as conn:
                    sql_read = f'SELECT asin_compet,img_url,id  FROM us_self_asin_compet_amazon where state=1 LIMIT 1000 FOR UPDATE'
                    print(sql_read)
                    a = conn.execute(sql_read)
                    self.df_read = pd.DataFrame(a, columns=['asin_compet', 'img_url', 'id'])
                    self.df_read.drop_duplicates(['asin_compet'], inplace=True)
                    if self.df_read.shape[0] == 0:
                        return []
                    index_tuple = tuple(self.df_read['id'])
                    print('更改状态 2 ', len(index_tuple))
                    if len(index_tuple) == 1:
                        sql_update = f"""UPDATE us_self_asin_compet_amazon a set state=2 where a.id in ('{index_tuple[0]}')"""
                    else:
                        sql_update = f"""UPDATE us_self_asin_compet_amazon a set state=2 where a.id in {index_tuple}"""
                    conn.execute(sql_update)
                    asin_img_list = list(
                        self.df_read.asin_compet + '|-|' + self.df_read.img_url)
                    # print(asin_img_list)
                    return asin_img_list
            except Exception as e:
                print("读取数据出bug并等待5s继续", e)
                self.mysql_reconnect()
                time.sleep(3)
                continue

    def run(self):
        while True:
            asin_img_list = self.read_img_url()
            if asin_img_list:
                for asin_img in asin_img_list:
                    self.asin_img_queue.put(asin_img)
                html_thread = []
                for i in range(40):
                    thread2 = threading.Thread(target=self.downlad_img)
                    html_thread.append(thread2)
                for ti in html_thread:
                    ti.start()
                    time.sleep(0.125)
                for t2 in html_thread:
                    t2.join()
                if self.asin_state_list:
                    self.update_asin_state(state=3, asin_list=self.asin_state_list)
                    self.asin_state_list = []
                if self.asin_not_find:
                    self.update_asin_state(state=1, asin_list=self.asin_not_find)
                    self.asin_not_find = []
                if self.asin_imgurl_null:
                    self.update_asin_state(state=5, asin_list=self.asin_imgurl_null)
                    self.asin_imgurl_null = []
                # break
            else:
                break


if __name__ == '__main__':
    Amazon_Img('us').run()
