# coding:utf-8
import os
import sys
import time
import scrapy
import logging
import platform
from scrapy import cmdline, signals
sys.path.append(os.path.dirname(os.path.dirname(sys.path[0])))  # 上级目录
# 亚马逊详情页数据获取
from amazon_spider.spiders.yswg_spider import SourceSpider

if "Windows" == platform.system():
    print("windows")
else:
    time.tzset()


class RedisGmgnSpider(SourceSpider):
    name = 'redis_gmgn_spider'
    SCHEDULER_DUPEFILTER_KEY = '%(spider)s:dupefilter'
    custom_settings = {
        'CONCURRENT_REQUESTS': 25,
        'DOWNLOAD_TIMEOUT': 20,
        'allowed_domains': ['amazon.com'],
        # # 设置重启爬虫时是否清空爬取队列
        # 'SCHEDULER_FLUSH_ON_START': False,
        # # 启用Redis调度存储请求队列
        # 'SCHEDULER': "amazon_spider.scrapy_redis.scheduler.Scheduler",
        # # 确保所有的爬虫通过Redis去重
        # 'DUPEFILTER_CLASS': "amazon_spider.scrapy_redis.dupefilter.RFPDupeFilter",
        # 'SCHEDULER_QUEUE_CLASS': 'amazon_spider.scrapy_redis.queue.SpiderPriorityQueue',
        # # 种子队列的信息
        # 'REDIS_URL': None,
        # 'REDIS_HOST': '192.168.10.224',
        # 'REDIS_PORT': 6379,
        # 'REDIS_PARAMS': {
        #     'password': 'HCL1zcUgQesaaXNLbL37O5KhpSAy0c',
        #     'db': 0
        # },
        # # # 6379
        # # 'FILTER_URL': None,
        # # 'FILTER_HOST': '127.0.0.1',
        # # 'FILTER_PORT': 6379,
        # # # 6379
        # # 'FILTER_DB': 0,
        # 'SCHEDULER_QUEUE_KEY': "detail_seed",
        # Retry settings
        'RETRY_ENABLED': True,
        'RETRY_TIMES': 2,  # 想重试几次就写几
        # 'COOKIES_ENABLED': True,
        # 'COOKIES_DEBUG': False,
        'RETRY_HTTP_CODES': [203, 301, 403, 408, 429, 500, 502, 503, 504, 522, 524, 404],
        # downloader middlewares for spider.
        'DOWNLOADER_MIDDLEWARES': {
            'scrapy.downloadermiddlewares.retry.RetryMiddleware': None,
            # 递增调用
            'amazon_spider.middlewares.RandomUserAgentMiddleware': 460,
            # 'amazon_spider.middlewares.ProxyMiddleware': 450,
            'amazon_spider.middlewares.CookiesZip': 480,
            # 'amazon_spider.middlewares.GetCookieMiddleware': 460,
            'amazon_spider.middleware.http2.HttpxMiddleware': 490,
            'amazon_spider.middleware.aiohttp.AiohttpMiddleware': 490,
            'amazon_spider.middleware.temu.CurlCffiRequests': 490,
            # 递减调用
            # 'scrapy.downloadermiddlewares.retry.RetryMiddleware': 500
        },
        'ITEM_PIPELINES': {
            # 'amazon_spider.pipeline.real_keepa_pipe.AmazonRealKeepaSpiderPipeline': 230,
            'amazon_spider.pipeline.news_real_pipe.AmazonRealKeepaSpiderPipeline': 200,
            # 'amazon_spider.pipeline.cs_count.FidleMonitorPipeline': 200,
        },
        'DOWNLOAD_HANDLERS': {
            'http': 'amazon_spider.downloadhandlers.ja3.MyHTTPDownloadHandler',
            'https': 'amazon_spider.downloadhandlers.ja3.MyHTTPDownloadHandler',
        }
    }

    def __init__(self, site='us'):
        super(RedisGmgnSpider, self).__init__()
        self.site = site
        self.sleep_count = 0
        self.seller_work = {
            "us": "Sold by",
            "uk": "Sold by",
            "fr": "Vendu par",
            "de": "Verkäufer",
            "es": "Vendedor",
            "it": "Venditore",
            "mx": "Vendedor",
            "ca": "Sold by"
        }

        self.seller_type = {
            "us": "Ships from",
            "uk": "Dispatches from",
            "fr": "Expéditeur",
            "de": "Versand",
            "es": "Envía por",
            "it": "Spedito da",
            "mx": "Envío por",
            "ca": "Ships from"
        }
        logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s', level=logging.INFO)
        # self.utils_requests = [{"use_httpx": True}]
        self.asins = ['B0C68HFRCJ', 'B0993KNJ8Q', 'B0D73QPH2L', 'B09SPSVQG8']
        # self.asins = ['B0C68HFRCJ']

        self.seeds = [{"asin": f"{asin}", "site": "us", "asin_type": "3,7", "is_variation": 1, "date_info": "",
                       "account_id": None, "priority": "1", "updated_at": "2024-05-21 08:52:44", "other_sellers_id": "",
                       "other_seller_name": "", "other_seller_buy_boy_type": ""} for asin in self.asins]
        self.utils_requests = [{"curlcffi": True}]

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(RedisGmgnSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.close, signals.spider_closed)
        crawler.signals.connect(spider.spider_idle, signals.spider_idle)
        return spider

    def spider_idle(self, spider):
        # read database again and send new requests
        logging.debug(f'IDLE------------------{self.site} {time.time()} {len(self.seeds)}')
        stats = self.crawler.stats
        stats.clear_stats()

        headers = {
            "authority": "gmgn.ai",
            "Host": "gmgn.ai",
            "accept-encoding": "gzip, deflate, br, zstd",
            "accept-language": "zh-CN,zh;q=0.9",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "accept-language": "zh-CN,zh;q=0.9",
        }
        meta = {
            # "use_aiohttp": True,
            # "amazon_proxy": True,
            "choice_header": True,
            # "cookiejar": int(time.time()),
            # "priority": job["priority"],
            'proxy': "http://127.0.0.1:8888",
        }
        url = "https://gmgn.ai/?chain=sol"
        self.crawler.engine.crawl(scrapy.Request(url=url, headers=headers, callback=self.parse, errback=self.err_parse,
                                                 dont_filter=True, meta=meta), self)

    def parse(self, response, **kwargs):
        meta = response.meta
        headers = {
            "Host": "gmgn.ai",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
            "accept": "application/json, text/plain, */*",
            "referer": "https://gmgn.ai/?chain=sol",
            "accept-language": "zh-CN,zh;q=0.9",
            "priority": "u=1, i"
        }
        params = {
            "device_id": "f3e0e1c1-54f5-45d3-93e4-b2f943197875",
            "client_id": "gmgn_web_2025.0317.105343",
            "from_app": "gmgn",
            "app_ver": "2025.0317.105343",
            "tz_name": "Asia/Shanghai",
            "tz_offset": "28800",
            "app_lang": "zh-CN",
            "orderby": "swaps",
            "direction": "desc",
            "filters[]": "frozen",
            "min_holder_count": "0",
            "max_holder_count": "800",
            "max_created": "24h"
        }
        url = "https://gmgn.ai/defi/quotation/v1/rank/sol/swaps/24h?device_id=f3e0e1c1-54f5-45d3-93e4-b2f943197875&client_id=gmgn_web_2025.0317.105343&from_app=gmgn&app_ver=2025.0317.105343&tz_name=Asia%2FShanghai&tz_offset=28800&app_lang=zh-CN&orderby=swaps&direction=desc&filters%5B%5D=frozen&min_holder_count=0&max_holder_count=800&max_created=24h"
        yield scrapy.Request(url=url, headers=headers, callback=self.parse_buy, errback=self.err_parse,
                             dont_filter=True, meta=meta)

    def parse_buy(self, response, **kwargs):
        datas = []
        for i in response.json()['data']['rank']:
            item = {
                "id": i['address'],
                "symbol": i['symbol'],
                # logo图片链接
                "logo": i['logo'],
                # 池子 / 市值
                "price": i['price'],
                "price_change_percent": i['price_change_percent'],
                # 24小时交易数
                "swaps": i['swaps'],
                # 24小时成交额
                "volume": i['volume'],
                # 池子 / 市值
                "liquidity": i['liquidity'],
                # 价格
                "market_cap": i['market_cap'],
                "hot_level": i['hot_level'],
                "pool_creation_timestamp": i['pool_creation_timestamp'],
                # 持有者
                "holder_count": i['holder_count'],
                "twitter_username": i['twitter_username'],
                "price_change_percent1m": i['price_change_percent1m'],
                "price_change_percent5m": i['price_change_percent5m'],
                # 24小时交易数
                "swaps_24h": i['swaps_24h'],
                "price_change_percent5m": i['price_change_percent5m'],
                # 初始流动性
                "initial_liquidity": i['initial_liquidity'],
                "is_show_alert": i['is_show_alert'],
                # top 10
                "top_10_holder_rate": i['top_10_holder_rate'],
                "renounced_mint": i['renounced_mint'],
                "renounced_freeze_account": i['renounced_freeze_account'],
                "burn_ratio": i['burn_ratio'],
                "burn_status": i['burn_status'],
                "dexscr_ad": i['dexscr_ad'],
                "dexscr_update_link": i['dexscr_update_link'],
                "cto_flag": i['cto_flag'],
                "twitter_change_flag": i['twitter_change_flag'],
                "launchpad_status": i['launchpad_status'],
                "rat_trader_amount_rate": i['rat_trader_amount_rate'],
                "bluechip_owner_percentage": i['bluechip_owner_percentage'],
                "smart_degen_count": i['smart_degen_count'],
                "renowned_count": i['renowned_count'],
                "is_wash_trading": i['is_wash_trading'],
            }
            datas.append(item)
        print("---------------------2-------------------------", dict(response.cookies))
        # 过滤掉条件不符合的
        d = [i for i in datas if i['liquidity'] <= 10000 and i['holder_count'] >= 800 and i['swaps'] >= 1000 and i['volume'] >= 5000000]
        print(d)
        for i in d:
            headers = {
                "Host": "gmgn.ai",
                "accept": "application/json, text/plain, */*",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
                "referer": "https://gmgn.ai/sol/token/3EcCmxrXwjHKeeBbXcX89h44WHAct2YbEVSqTHtmpump",
                "accept-language": "zh-CN,zh;q=0.9",
            }
            url = f"https://gmgn.ai/vas/api/v1/token_traders/sol/{i['id']}?device_id=f3e0e1c1-54f5-45d3-93e4-b2f943197875&client_id=gmgn_web_2025.0321.194042&from_app=gmgn&app_ver=2025.0321.194042&tz_name=Asia%2FShanghai&tz_offset=28800&app_lang=zh-CN&limit=100&orderby=realized_profit&direction=desc"
            yield scrapy.Request(url=url, headers=headers, callback=self.parse_three, errback=self.err_parse,
                             dont_filter=True, meta=meta)

    def parse_three(self, response, **kwargs):
        two_datas = []
        for k in response.json()['data']['list']:
            cur_dict = {
                "address": k['address'],
                "account_address": k['account_address'],
                # 总卖出
                "sell_volume_cur": k['sell_volume_cur'],
                # 卖出金额
                "sell_amount_cur": k['sell_amount_cur'],

                # 总买入
                "buy_volume_cur": k['buy_volume_cur'],
                # 购买金额
                "buy_amount_cur": k['buy_amount_cur'],
                # 资金来源
                "native_transfer": k['native_transfer'],
                # 利润
                "profit": k['profit'],
                # 利润变化
                "profit_change": k["profit_change"],
                # 已实现利润
                "realized_profit": k["realized_profit"],
                # 平均买价
                "avg_cost": k["avg_cost"],
                # 平均卖价
                "avg_sold": k["avg_sold"],
                "transfer_in": k['transfer_in'],
                # 未实现利润
                "unrealized_pnl": k["unrealized_pnl"],
                "unrealized_profit": k["unrealized_profit"],

            }
            two_datas.append(cur_dict)
        print(two_datas)
        transfer_len = len([i['address'] for i in two_datas if i['transfer_in']])
        # len(two_datas/transfer_len)
        if transfer_len <= 10:
            print("超过10个感叹号")
        url = f"https://gmgn.ai/api/v1/token_stat/sol/{transfer_len['address']}?device_id=f3e0e1c1-54f5-45d3-93e4-b2f943197875&client_id=gmgn_web_2025.0324.134322&from_app=gmgn&app_ver=2025.0324.134322&tz_name=Asia%2FShanghai&tz_offset=28800&app_lang=zh-CN"
        yield scrapy.Request(url=url, headers=headers, callback=self.parse_five, errback=self.err_parse,
                             dont_filter=True, meta=meta)

    def parse_five(self, response, **kwargs):
        d = response.json()
        datas = {
            "holder_count": d['data']['holder_count'],
            # 蓝筹持有者
            "bluechip_owner_count": d['data']['bluechip_owner_count'],
            "top_rat_trader_percentage": d["data"]["top_rat_trader_percentage"],
        }
        if float(d["data"]["top_rat_trader_percentage"]) >= 0.003:
            print("老鼠仓大于3%过滤")

    def err_parse(self, failure, **kwargs):
        logging.info(f"error______ {failure.getErrorMessage()}")

    def close(self, spider, reason):
        logging.info("spider finish")
        logging.info(f"self.variat_list ")


if __name__ == '__main__':
    args = 'scrapy crawl redis_gmgn_spider -a site=us'.split()
    cmdline.execute(args)

# us, uk, fr, de, es, it, mx
# nohup cd /mnt/hezhe/amazon_spider/amazon_spider scrapy crawl real_new_detail  > real_redis_amazon1.log 2>&1 &
# nohup scrapy crawl real_new_detail -a site=us  > real_redis_amazon1.log 2>&1 &
# source activate pyspark
# for i in `ps -ef|grep "scrapy crawl real_new_detail" |awk '{print $2}' `; do kill -9 $i ; done;
# C:\Users\Administrator\AppData\Local\Programs\Python\Python38\scrapy crawl real_redis_detail
# 0 21 * * *  cd /mnt/hezhe/amazon_spider/amazon_spider && /opt/module/anaconda3/envs/pyspark/bin/scrapy crawl real_redis_detail -a site=uk > real_redis_amazon1.log 2>&1 &
