
import sys
import os

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import curl_cffi
from lxml import etree
from threading_spider.db_connectivity import connect_db
import json
import time
from utils.secure_db_client import get_remote_engine
import random


def get_cid():
    # 获取所有站点的bsr 大类名称 和 分类id，存储到us站点
    url = 'https://www.sellersprite.com/v2/tools/sales-estimator'
    headers = {

        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
        "Cache-Control": "no-cache",
        "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    }
    resp = curl_cffi.get(url, headers=headers, impersonate="chrome")
    html = etree.HTML(resp.text)
    data_category = html.xpath("//script[@id='data-category']/text()")[0]
    print(data_category)
    save_site_category(json.loads(data_category))

def mysql_connect(site='us'):
    engine_mysql = get_remote_engine(
        site_name=site,  # -> database "selection"
        db_type='mysql',  # -> 服务端 alias "mysql"
    )
    return engine_mysql

def db_cursor_connect_update(sql, site):
    for i in range(3):
        try:
            engine_us_mysql = mysql_connect(site=site)
            print('更新sql：', sql)
            with engine_us_mysql.begin() as conn:
                conn.execute(sql)
            break
        except:
            print(site, 'db_cursor_connect 报错：', sql)
def db_cursor_connect_msyql_read(site,select_state1_sql):
    for i in range(3):
        try:
            engine_mysql = mysql_connect(site=site)
            df = engine_mysql.read_sql(select_state1_sql)
            return df
        except Exception as e:
            import traceback
            traceback.print_exc()  # ★ 打印完整栈到终端
            print(e, 'db_cursor_connect_msyql_read 报错：', select_state1_sql)

def junglescout_spider(db_base):
    month = time.strftime("%m")
    year = time.strftime("%Y")
    year_month = f'{year}_{int(month)}'


    category_name_sql_select = f"select `name`,c_id  from all_site_category where site='{db_base}' and state =1"
    print(category_name_sql_select)
    category_name_list_df = db_cursor_connect_msyql_read('us',category_name_sql_select)
    print(category_name_list_df)

    category_name_list = list(category_name_list_df['name']+'|-|==|'+category_name_list_df['c_id'])
    for category_name_id in category_name_list:
        print(category_name_id, '2323232323')
        c_name = category_name_id.split('|-|==|')[0]
        c_id = category_name_id.split('|-|==|')[1]
        print(c_name, c_id)
        name_rnak_list = []
        up_sql = f"UPDATE all_site_category set state=2 WHERE site='{db_base}' and state=1 and c_id='{c_id}'"
        db_cursor_connect_update(up_sql,'us')
        rank_list = [1, 10, 30, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500,
                     1600,
                     1700,
                     1800,
                     1900,
                     2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500,
                     3600,
                     3700,
                     3800, 3900, 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300,
                     5400,
                     5500,
                     5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100,
                     7200,
                     7300,
                     7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900,
                     9000,
                     9100,
                     9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 11000, 12000, 13000, 14000, 15000, 16000,
                     17000,
                     18000, 19000, 20000, 25000, 30000, 35000, 40000, 45000, 50000
                     # 55000, 60000, 65000, 70000, 75000,
                     # 80000,
                     # 85000, 90000, 95000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 450000, 500000,
                     # 550000,
                     # 600000, 650000, 700000, 750000, 800000, 850000, 900000, 950000, 1000000
                     ]
        for i in rank_list:
            headers = {
                "Referer": "https://www.sellersprite.com/v2/tools/sales-estimator",
                "Origin":"https://www.sellersprite.com",
                "Accept": "application/json, text/javascript, */*; q=0.01",
                "Accept-Encoding": "gzip, deflate, br, zstd",
                "Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
                "Cache-Control": "no-cache",
                'Cookie':'_ga=GA1.1.460823715.1761964155; _gcl_au=1.1.1179274784.1761964155; Hm_lvt_e0dfc78949a2d7c553713cb5c573a486=1761964155; HMACCOUNT=B4AF3F9F7A9D6EC1; ecookie=Dze8cmDaWQgl62by_CN; cefb279b040e878c5f29=3df3541e6a5558a1721e067eca0b7599; _fp=65dbbe41a37f8f9fbe702eba96328267; MEIQIA_TRACK_ID=34rGDtW8dkHrHYKAd1YqneyES16; MEIQIA_VISIT_ID=34rGDvmHZCXsyI9TcknmSs0VUgF; current_guest=Q5ta0ho0plze_251101-107638; _gaf_fp=71220b6380421e1c3114927822a0491d; rank-guest-user=4415412671XrN9Zk+EL9uIxING7/uXAkz1zoQytfQ4xehrp1wmpmp0tq0CKPMciyLt+xiapPpr; rank-login-user=4415412671XrN9Zk+EL9uIxING7/uXAtdtFXnuDWfcyj/blj6W2ZWpWUeF9+7WsIFXBV6TrXmy; rank-login-user-info="eyJuaWNrbmFtZSI6IuWViuWTiOWTiOWTiCIsImlzQWRtaW4iOmZhbHNlLCJhY2NvdW50IjoiMTUzKioqKjEyNzAiLCJ0b2tlbiI6IjQ0MTU0MTI2NzFYck45WmsrRUw5dUl4SU5HNy91WEF0ZHRGWG51RFdmY3lqL2JsajZXMlpXcFdVZUY5KzdXc0lGWEJWNlRyWG15In0="; Sprite-X-Token=eyJhbGciOiJSUzI1NiIsImtpZCI6IjE2Nzk5NjI2YmZlMDQzZTBiYzI5NTEwMTE4ODA3YWExIn0.eyJqdGkiOiJfUWNIdGFKc1I3Xy04czRXcUF4UFpnIiwiaWF0IjoxNzYyMDg3NTQ0LCJleHAiOjE3NjIxNzM5NDQsIm5iZiI6MTc2MjA4NzQ4NCwic3ViIjoieXVueWEiLCJpc3MiOiJyYW5rIiwiYXVkIjoic2VsbGVyU3BhY2UiLCJpZCI6MTQ2NjIwMSwicGkiOm51bGwsIm5uIjoi5ZWK5ZOI5ZOI5ZOIIiwic3lzIjoiU1NfQ04iLCJlZCI6Ik4iLCJwaG4iOiIxNTM2ODA1MTI3MCIsImVtIjoibWVpeW91bGFAbWVpeW91bGEuY29tIiwibWwiOiJHIn0.bA22TL7V1Ojva0xFsPl_1b---9IabSoJXdkWLxhspamEfSx8eLf-sv2VZz6fNqLbZI_ZXb9nBfdCbM0S2yzvElDeC9laJWi6Y_Cz5ywZvWPkkSl5Wmjal5Nso33UeoMffiBkjDkwIN6uIk-726zea76m7xrJmjQbN2wet_fzW04U4RbYPfCIam0eEvXQjhMAuYPoihIcF-LocsQ3Qr-m3xVaWD6CxxTC30rt4ZfD63kRGjrVa2RfgqVeBVS5nMwBF0PWEYgRUN2mB9jyDfnG472TNfxLhXIGPUTaoMtnaxQoRtbcENuapbpIZCpCruq1SuMNdqK3oxtdnUij6yiXEA; ao_lo_to_n="4415412671XrN9Zk+EL9uIxING7/uXAmjk9eVYRzsag6V6ttkMQIH2Lh3Ah2vwuQRDfzmyINXazLLen51hoAgbtysMQkarAmDtVJPvrGJg/tasB7+3bQc="; JSESSIONID=2FD41936F77140471FC8EC556826B071; Hm_lpvt_e0dfc78949a2d7c553713cb5c573a486=1762087559; _ga_CN0F80S6GL=GS2.1.s1762087538$o2$g1$t1762087559$j39$l0$h0; _ga_38NCVF2XST=GS2.1.s1762087538$o2$g1$t1762087565$j33$l0$h205427331',
                "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            }
            url = "https://www.sellersprite.com/v2/tools/sales-estimator/bsr.json"
            data = {
                "station": "DE",
                "cid": c_id,  # 分类id
                "bsr": f"{i}"  # 排名
            }
            print(c_name, '请求参数 data::', data)
            for i1 in range(3):
                try:
                    response = curl_cffi.post(url, headers=headers, data=data, impersonate="chrome",timeout=300)
                    print(response.url)
                    # print(response.text)
                    response = json.loads(response.text)
                    break
                except:
                    time.sleep(random.uniform(15, 30.75))

            response_data = response['data']
            print('code::', response['code'])
            print('message::', response['message'])
            print('estMonSales::', response_data['estMonSales'])
            est = response_data.get('estMonSales')
            if est is None:
                # 没拿到数据，跳出循环
                break
            if est == 0.0:
                print(f"{c_name} 排名{i}：销量 0，跳出循环。")
                break

            # 2) 既然不会有 0.3、0.99 这种情况，剩下的 est 都是 ≥ 1
            print(type(est))
            print('获取数据：', c_name, i, est, year_month)
            sales = int(est)
            name_rnak_list.append((c_name, i, sales, year_month))
            time.sleep(random.uniform(20, 45.75))
            # break
        for i in range(4):
            try:
                inset_sql = f"INSERT INTO {db_base}_one_category (name, rank,orders,`year_month`) values (%s, %s, %s, %s)"
                print(inset_sql)
                engine_db_msyql = mysql_connect(site=db_base)
                with engine_db_msyql.begin() as conn:
                    conn.execute(
                        f"INSERT INTO {db_base}_one_category (name, rank,orders,`year_month`) values (%s, %s, %s, %s)",
                        name_rnak_list)

                up_sql = f"UPDATE all_site_category set state=3 WHERE site='{db_base}' and state=2 and c_id='{c_id}'"
                print('更新状态：', up_sql)
                db_cursor_connect_update(up_sql,'us')
                break
            except Exception as e:
                print('存储失败：', e)

                time.sleep(20)
        print('当前完成。获取下一个分类销量')
        time.sleep(random.uniform(50, 120.5))


def run():
    junglescout_spider('de')

if __name__ == '__main__':
    run()