import re
import os
import time
import js2py
import logging
from flask import *
import pandas as pd
import requests as req
from curl_cffi import requests
from sqlalchemy import create_engine
from concurrent.futures import ThreadPoolExecutor
from db.redis_db import srandmembers, srem

# 20250401最新版添加部分链接爬取方式
os.environ['NO_PROXY'] = 'stackoverflow.com'

app = Flask(__name__)


def get_country_engine(site_name="us"):
    Mysql_arguments = {
        'user': 'XP_Yswg2025_PY',
        # 'password': urllib.parse.quote('XP_2025_PY@Yswg300'),
        # 'password': 'XP_2025_PY@Yswg300',
        'password': 'Gd1pGJog1ysLMLBdML8w81',
        'host': 'rm-wz9yg9bsb2zf01ea4yo.mysql.rds.aliyuncs.com',
        'port': 3306,
        'database': 'selection',
        'charset': 'utf8',
    }
    if site_name == 'us':
        db_ = 'mysql+pymysql://{}:{}@{}:{}/{}?charset={}'.format(*Mysql_arguments.values())
    else:
        Mysql_arguments["database"] = f"selection_{site_name}"
        db_ = 'mysql+pymysql://{}:{}@{}:{}/{}?charset={}'.format(*Mysql_arguments.values())
    engine = create_engine(db_)  # , pool_recycle=3600
    return engine


def get_1688_data(memberid, proxies, seed_url=''):
    # memberid = 'b2b-2212988406689fd305'
    # requests = requests.session()

    engine = get_country_engine("us")
    # 添加cookie2 获取  链路补充
    headers = {
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9",
        "cache-control": "no-cache",
        "pragma": "no-cache",
        "referer": "https://re.1688.com/",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
    }
    url = "https://dcms.1688.com/open/query.json"
    params = {
        "app": "DCMS",
        "dataId": "236",
        "resourceId": "1441408",
        "callback": "jsonp_0t5g1s0yd14v2t4"
    }

    response = requests.get(url, headers=headers, params=params, verify=False, proxies=proxies, timeout=3)
    cookies = dict(response.cookies)
    print(f"get cookie2 -->{cookies}")
    headers = {
        "Host": "h5api.m.1688.com",
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9",
        "cache-control": "no-cache",
        "pragma": "no-cache",
        "referer": "https://sale.1688.com/",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
    }

    b = "undefined"
    new_time = round(time.time() * 1000)
    with open("z.js", "r", encoding="utf-8") as f:
        m = js2py.EvalJs()
        m.execute(f.read())
        # a = m.get_cc('{"componentKey":"wp_pc_contactsmall","params":"{\\"memberId\\":\\"%s\\"}"}' % memberid, b, new_time)

        # a = m.get_cc("%7B%22type%22%3A%22canView%22%7D", b, new_time)
        # print(b, a)
        a = m.get_cc('{"componentKey":"wp_pc_contactsmall","params":"{\\"memberId\\":\\"%s\\"}"}' % memberid, b, new_time)
    params = {
        "appKey": "12574478",
        "t": f'{new_time}',
        "sign": f'{a[0]}',
        "api": "mtop.cbu.overseas.site.ip.business.check",
        # "v": "1.0",
        # "timeout": "3000",
        "data": "%7B%22type%22%3A%22canView%22%7D"
    }
    url = "https://h5api.m.1688.com/h5/mtop.cbu.overseas.site.ip.business.check/1.0/"
    response = requests.get(url, headers=headers, params=params, cookies=cookies, verify=False, proxies=proxies, timeout=3)
    print(response.text)
    cookies.update(dict(response.cookies))
    _m_h5_tk = response.cookies['_m_h5_tk']
    b = _m_h5_tk.split("_")[0]
    new_time = round(time.time() * 1000)
    with open(r"./z.js", "r", encoding="utf-8") as f:
        m = js2py.EvalJs()
        m.execute(f.read())
        a = m.get_cc('{"componentKey":"wp_pc_contactsmall","params":"{\\"memberId\\":\\"%s\\"}"}' % memberid, b,
                     new_time)
        # print(cookies)
    params = (
        # ('jsv', '2.7.0'),
        ('appKey', '12574478'),
        ('t', str(new_time)),
        ('sign', a[0]),
        ('api', 'mtop.alibaba.alisite.cbu.server.pc.ModuleAsyncService'),
        ('data', '{"componentKey":"wp_pc_contactsmall","params":"{\\"memberId\\":\\"%s\\"}"}' % memberid),
    )
    # print(a[0])
    # print(str(new_time))
    url = 'https://h5api.m.1688.com/h5/mtop.alibaba.alisite.cbu.server.pc.moduleasyncservice/1.0/'
    headers = {
        "Host": "h5api.m.1688.com",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
        "accept": "*/*",
        "referer": "https://shop3413081512086.1688.com/",
        "accept-language": "zh-CN,zh;q=0.9"
    }
    # cookies.update(
    #     {}
    # )
    # print(cookies)
    response = requests.get(url, headers=headers, params=params, cookies=cookies, verify=False, proxies=proxies, timeout=3)
    print(response.json())
    data_dict = response.json()
    if ":哎哟喂,被挤爆啦" in response.text:
        print("获取失败")
        srem('ip_lists', proxies.get('http').split("//")[-1].split(":")[0])
        print("删除无效 ip")
        return {}
    else:
        print("获取成功")

    company_name = data_dict["data"].get("companyName")
    mobileNo = data_dict.get("data").get("mobileNo")
    phoneNum = data_dict.get("data").get("phoneNum")
    fax = data_dict.get("data").get("faxNum")
    address = data_dict.get("data").get("address")
    # mobileNo 手机
    # phoneNum 电话
    # company_name 公司名称
    # fax 传真
    # contact_name 联系人名称
    # position职位信息
    # address 地址
    # memberId 工厂id
    contact_name = data_dict.get("data").get("name")
    position = data_dict.get("data").get("jobTitle")

    for i in range(0, 5):
        cord_data = get_1688_index(memberid, proxies, cookies)
        if not cord_data.get("home_url") and not cord_data.get('address'):
            ip = srandmembers('ip_lists', 1)
            if not ip:
                proxies = {
                }
            else:
                print(f"ip --> {str(ip[0], 'utf-8')}")
                proxies = {
                    'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                    'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                }
            cookies = {}
            print("home_url --> 获取失败")
            continue
        else:
            print("home_url --> 获取成功")
            break
    cord_data['contact_address'] = address
    home_url = cord_data.get("home_url")
    items = {
        "company_name": company_name,
        "mobileNo": mobileNo,
        "phoneNum": phoneNum,
        "fax": fax or "",
        "contact_name": contact_name,
        "position": position,
        "memberId": memberid,
        "address": cord_data.get('address') or address,
        "home_url": home_url,
        "card_url": f"https://sale.1688.com/factory/card.html?memberId={memberid}",
        "state": 3
    }
    sql = f"DELETE FROM company_info_1688 where memberId='{memberid}'"
    with engine.connect() as conn:
        conn.execute(sql)
        engine.dispose()
    df = pd.DataFrame([items])
    df.to_sql(name=f"company_info_1688", con=engine, if_exists='append', index=False)
    items.update(cord_data)
    items['seed_url'] = seed_url
    return items


def get_1688_index(memberid, proxies, cookies):
    headers = {
        'authority': 'sale.1688.com',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'accept-language': 'zh-CN,zh;q=0.9',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    }
    url = f"https://sale.1688.com/factory/card.html?memberId={memberid}"
    response = requests.get(url, headers=headers, cookies=cookies, verify=False, proxies=proxies, timeout=3)
    datas = re.findall(r"window\.\$\$pageData=(.*?});.*?window\.\$\$pageSeed", response.text, re.S)
    item = {}
    if datas:
        datas = json.loads(datas[0])
        for i in datas.values():
            if isinstance(i.get("initShopInfo"), dict):
                print("地址", i["initShopInfo"]["factoryDetailedAddress"])
                item['home_url'] = i['initShopInfo']['shopPcWpIndexUrl'] + '/'
                item['address'] = i["initShopInfo"]["factoryDetailedAddress"]
            # if i.get("authInfo"):
            #     print("--企业诚信档案")
            #     print("地址", i["authInfo"]["result"]["addressTitle"])
            #     item['addressTitle'] = i["authInfo"]["result"]["addressTitle"]
    return item


def url_deal(url):
    if not url:
        return False
    seed = [
        {'url': "https://detail.1688.com/offer/526334337976.html"},
        {'url': "https://detail.1688.com/offer/831554412992.html"},
        {'url': "https://detail.1688.com/offer/524250788330.html"},
        {'url': "https://detail.1688.com/offer/683666722353.html"},
        {'url': "https://detail.1688.com/offer/646566235182.html"},
        {'url': "https://detail.1688.com/offer/694006690891.html"},
        {'url': "https://detail.1688.com/offer/706440717733.html"},
        {'url': "https://detail.1688.com/offer/598675302140.html"},
        {'url': "https://detail.1688.com/offer/774182784009.html"},
        {'url': "https://www.1688.com/factory/b2b-2208079081039883bb.html"},
        {'url': "https://www.1688.com/factory/b2b-2216199596581b2ff6.html"},
        {'url': "https://www.1688.com/factory/b2b-2208079081039883bb.html"},
        {'url': "https://sale.1688.com/factory/card.html?memberId=b2b-22146811028686c897&aHdkaW5n_isCentral=true&aHdkaW5n_isGrayed=false&aHdkaW5n_isUseGray=true&topOfferIds=682446689736,702963292446,692982565401,714911979176&spm=a26352.24780423.offer.0"},
        {'url': "https://sale.1688.com/factory/card.html?memberId=b2b-2210929547969c1ef9&aHdkaW5n_isCentral=true&aHdkaW5n_isGrayed=false&aHdkaW5n_isUseGray=true&topOfferIds=674471935702,639579593311,654166571356,653178311381&spm=a26352.24780423.offer.0"},

    ]
    url_list = ['https://detail.1688.com/offer/', 'https://www.1688.com/factory/', 'https://sale.1688.com/factory/card.html']
    for i in url_list:
        if i in url:
            return True
    else:
        return False


def get_1688_detail(seed_url, proxies):
    engine = get_country_engine("us")
    if not url_deal(seed_url):
        return {'error_msg': "url 不符合爬取条件", 'seed_url': seed_url}
    headers = {
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9",
        "origin": "https://www.1688.com",
        "referer": "https://www.1688.com/",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
    }
    url = "https://log.mmstat.com/eg.js"
    response = requests.get(url, headers=headers, verify=False, proxies=proxies, timeout=3)
    # 添加cookie2 获取
    headers = {
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "accept-language": "zh-CN,zh;q=0.9,de;q=0.8",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
    }
    cookies = {
        "cna": dict(response.cookies).get("cna"),
        "_user_vitals_session_data_": "{\"user_line_track\":true,\"ul_session_id\":\"p6vlefe2nto\",\"last_page_id\":\"detail.1688.com%2Fo422iok1zka\"}",
    }

    response = requests.get(seed_url, headers=headers, verify=False, proxies=proxies, cookies=cookies, timeout=3)
    if '<h3 class="mod-detail-offline-title">商品已下架</h3>' in response.text:
        # 651590058241
        return "商品下架"
    elif "<title>404-" in response.text:
        # 727697705061
        return "商品下架"
    elif "页面-验证码" in response.text:
        return False
    elif "密码登录" in response.text:
        return "密码登录"
    else:
        datas = re.findall(r'"companyName":"(.*?)"', response.text)
        print(f"数据获取成功 {datas}")
        return {'company_name': datas[0], 'seed_url': seed_url}


@app.route('/get_detail', methods=['POST'])
def detail_msg():
    if request.method == 'POST':
        for i in range(0, 5):
            ip = srandmembers('ip_lists', 1)
            if not ip:
                proxies = {
                }
            else:
                print(f"ip --> {str(ip[0], 'utf-8')}")
                proxies = {
                    'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                    'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                }
            try:
                if request.form.get("memberId"):
                    if msg := get_1688_data(request.form.get("memberId"), proxies):
                        print(f"state: 200, cookies: {msg}")
                        return jsonify({'state': '200', 'msg': msg})
                    else:
                        continue
                else:
                    seed_url = request.form.get("url")
                    if "https://sale.1688.com/factory/card.html" in seed_url or "https://www.1688.com/factory/" in seed_url:
                        memberId = re.findall(r"(?:factory/|memberId=)(.*?)(?:\.|&)", seed_url)
                        memberId = memberId[-1] if memberId else ''
                        if msg := get_1688_data(memberId, proxies):
                            print(f"state: 200, cookies: {msg}")
                            return jsonify({'state': '200', 'msg': msg})
                        else:
                            continue
                            # return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})
                    else:
                        if msg := get_1688_detail(seed_url, proxies):
                            print(f"state: 200, cookies: {msg}")
                            return jsonify({'state': '200', 'msg': msg})
                        else:
                            continue
            except TimeoutError as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print("删除无效 ip")
                print(f"重新执行程序 {e}")
                continue
            except req.exceptions.ProxyError as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print(f"代理失效重试 重新执行程序 {e}")
                continue
            except Exception as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print("删除无效 ip")
                print(f"重新执行程序 {e}")
                continue
            else:
                return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})


@app.route('/get_module', methods=['POST'])
def upload_file():
    if request.method == 'POST':
        for i in range(0, 5):
            ip = srandmembers('ip_lists', 1)
            if not ip:
                proxies = {
                }
            else:
                print(f"ip --> {str(ip[0], 'utf-8')}")
                proxies = {
                    'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                    'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                }
            try:
                if msg := get_1688_data(request.form.get("memberId"), proxies):
                    print(f"state: 200, cookies: {msg}")
                    return jsonify({'state': '200', 'msg': msg})
                else:
                    continue
                    # return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})
            except TimeoutError as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print("删除无效 ip")
                print(f"重新执行程序 {e}")
                continue
            except req.exceptions.ProxyError as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print(f"代理失效重试 重新执行程序 {e}")
                continue
            except Exception as e:
                if ip:
                    srem('ip_lists', str(ip[0], 'utf-8'))
                print("删除无效 ip")
                print(f"重新执行程序 {e}")
                continue
            else:
                return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})


def get_detail_api(seed_url):
    for i in range(0, 5):
        ip = srandmembers('ip_lists', 1)
        if not ip:
            proxies = {
            }
        else:
            print(f"ip --> {str(ip[0], 'utf-8')}")
            proxies = {
                'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
                'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
            }
        try:
            if "https://sale.1688.com/factory/card.html" in seed_url or "https://www.1688.com/factory/" in seed_url:
                memberId = re.findall(r"(?:factory/|memberId=)(.*?)(?:\.|&)", seed_url)
                memberId = memberId[-1] if memberId else ''
                if msg := get_1688_data(memberId, proxies, seed_url):
                    print(f"state: 200, cookies: {msg}")
                    return msg
                else:
                    continue
                    # return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})
            else:
                if msg := get_1688_detail(seed_url, proxies):
                    print(f"state: 200, cookies: {msg}")
                    return msg
                else:
                    continue
        except TimeoutError as e:
            if ip:
                srem('ip_lists', str(ip[0], 'utf-8'))
            print("删除无效 ip")
            print(f"重新执行程序 {e}")
            continue
        except req.exceptions.ProxyError as e:
            if ip:
                srem('ip_lists', str(ip[0], 'utf-8'))
            print(f"代理失效重试 重新执行程序 {e}")
            continue
        except Exception as e:
            if ip:
                srem('ip_lists', str(ip[0], 'utf-8'))
            print("删除无效 ip")
            print(f"重新执行程序 {e}")
            continue
        else:
            return {'error_msg': "IP 被检测 请稍后重试", "seed_url": seed_url}
            # return jsonify({'state': '400', 'msg': "IP 被检测 请稍后重试"})


@app.route('/batch_get_detail', methods=['POST'])
def detail_msg_list():
    if request.method == 'POST':
        seed_url_list = json.loads(request.form.get("url"))

        results = {}
        print('-----', seed_url_list, len(seed_url_list))
        # 使用 ThreadPoolExecutor 并发执行并获取返回值
        with ThreadPoolExecutor(max_workers=10) as executor:  # 可根据需要调整 max_workers
            future_list = [
                executor.submit(get_detail_api, item)
                for item in seed_url_list
            ]

            for future in future_list:
                try:
                    result = future.result()  # 获取线程返回值
                    if result.get("company_name"):
                        results.update({result.get('seed_url'): result.get("company_name")})
                    # elif result.get("error_msg"):
                    #     results.update({result.get('seed_url'): result.get("error_msg")})
                    # else:
                    #     pass
                except Exception as e:
                    print(f"线程执行出错: {e}")
                    # results.append({"error": str(e)})

        print("所有线程处理完成")
        return jsonify({'state': '200', 'data': results})


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=22223)
    # for i in range(0, 5):
    #     ip = srandmembers('ip_lists', 1)
    #     print(f"ip --> {str(ip[0], 'utf-8')}")
    #     try:
    #         proxies = {
    #             'http': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
    #             'https': f'http://{str(ip[0], "utf-8")}:3389' if ip else "",
    #         }
    #         d = get_1688_data("b2b-2213949951411eadd7", proxies)
    #         print(d)
    #         break
    #         # app.run(host='0.0.0.0', port=22223)
    #     except TimeoutError as e:
    #         srem('ip_lists', str(ip[0], 'utf-8'))
    #         print("删除无效 ip")
    #         print(f"重新执行程序 {e}")
    #         continue
    #     except requests.exceptions.ProxyError as e:
    #         srem('ip_lists', str(ip[0], 'utf-8'))
    #         print(f"代理失效重试 重新执行程序 {e}")
    #         continue
    #     except Exception as e:
    #         srem('ip_lists', str(ip[0], 'utf-8'))
    #         print("删除无效 ip")
    #         print(f"重新执行程序 {e}")
    #         continue


# nohup python 1688_Api.py > data_1688_api.log 2>&1 &

#
