import os, sys
import queue
import scrapy
import logging
import pandas as pd
from urllib.parse import urlencode
from scrapy import cmdline, signals
sys.path.append(os.path.dirname(os.path.dirname(sys.path[0])))  # 上级目录


class CantonfairSpider(scrapy.Spider):
    name = 'b2b_shop_api'
    custom_settings = {
        'SPIDER_MODULES': ['amazon_spider.spiders_text'],
        'NEWSPIDER_MODULE': 'amazon_spider.spiders_text',
        #  curl_cffi 代理添加
        # 'PROXY_HOST': 'http-dynamic-S02.xiaoxiangdaili.com',
        # 'PROXY_PORT': 10030,
        # 'PROXY_USER': '******',
        # 'PROXY_PASS': '******',
        'CONCURRENT_REQUESTS': 30,
        'DOWNLOAD_TIMEOUT': 10,
        # 'DOWNLOAD_DELAY': 2,
        # # 启用后，当从相同的网站获取数据时，Scrapy将会等待一个随机的值，延迟时间为0.5到1.5之间的一个随机值乘以DOWNLOAD_DELAY
        # 'RANDOMIZE_DOWNLOAD_DELAY': True,
        'allowed_domains': ['cantonfair.org.cn'],
        # Retry settings
        'RETRY_ENABLED': True,
        'RETRY_TIMES': 3,  # 想重试几次就写几
        'RETRY_HTTP_CODES': [203, 301, 302, 403, 408, 429, 500, 502, 503, 504, 522, 524, 404, 401],
        # downloader middlewares for spider.
        'DOWNLOADER_MIDDLEWARES': {
            # 递增调用
            'amazon_spider.middlewares.RandomUserAgentMiddleware': 480,
            # 'amazon_spider.middlewares.ProxyMiddleware': 450,
            # 'amazon_spider.middlewares.GetCookieMiddleware': 460,
            'amazon_spider.middleware.aiohttp.AiohttpMiddleware': 490,
            'amazon_spider.middleware.http2.HttpxMiddleware': 490,
            'amazon_spider.middleware.temu.CurlCffiRequests': 490,
            # 递减调用
            # 'amazon_spider.middlewares.UpdateCookiesUrl': 530,
            'scrapy.downloadermiddlewares.retry.RetryMiddleware': 500
        },
        'ITEM_PIPELINES': {
            # 'amazon_spider.pipeline.amazon_comment_pipe_text.AmazonCommentSpiderPipeline': 230,
        },
        'DOWNLOAD_HANDLERS': {
            'http': 'amazon_spider.downloadhandlers.ja3.MyHTTPDownloadHandler',
            'https': 'amazon_spider.downloadhandlers.ja3.MyHTTPDownloadHandler',
        },
        # 'DOWNLOAD_HANDLERS': {
        #     'http': ('amazon_spider.downloadhandlers.curl.FingerprintDownloadHandler'),
        #     'https': ('amazon_spider.downloadhandlers.curl.FingerprintDownloadHandler'),
        # }
    }

    def __init__(self):
        super(CantonfairSpider, self).__init__()
        self.name = '照明及电气'
        self.utils_requests = [{"use_aiohttp": True}, {"curlcffi": True}, {"use_httpx": True}]

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(CantonfairSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.close, signals.spider_closed)
        # crawler.signals.connect(spider.spider_idle, signals.spider_idle)
        return spider

    def start_requests(self):
        self.name = '文具'
        p = [
            # {
            #     # 礼品及装饰品
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147061383880704",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 进口展
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "",
            #     "page": "0",
            #     "size": "20",
            #     "sort": "namePinyin asc",
            #     # salesInfo.seller.sellerType eq 'IMPORT' and salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.seller.sellerType eq 'IMPORT' and salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 电子家电
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147245295706112",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.seller.sellerType eq 'IMPORT' and salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 工业制造
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147369757478912",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 车辆及两轮车
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147262152626176",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 照明及电气
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461148003609088000",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 五金工具
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147703003324416",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 家庭用品
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147108741746688",
            #     "page": "0",
            #     "size": "20",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 建材及家具
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147101884076032",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 中间品
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147101884076032",
            #     "page": "0",
            #     "size": "20",
            #     "scoreStrategy": "shop",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.udfs.category.udfs.intermediategoods eq 'Y' and salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 玩具及孕婴童
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147079700406272",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 时尚
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147105440849920",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 家用纺织品
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147103742152704",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 文具
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147104627134464",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },
            # {
            #     # 健康休闲
            #     "productSearchable": "true",
            #     "industrySiteId": "461110967833538560",
            #     "unbox": "true",
            #     "lang": "zh-CN,en-US",
            #     "categoryId": "461147098763513856",
            #     "page": "0",
            #     "size": "60",
            #     "sort": "namePinyin asc",
            #     # salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'
            #     "productFilter": "salesInfo.status eq 'LISTED' and salesInfo.shop.id ne '665972138192240640'"
            # },

        ]
        headers = {
            "accept": "application/json, text/plain, */*",
            "accept-language": "zh-CN",
            "cache-control": "no-cache",
            "pragma": "no-cache",
            "referer": "https://www.cantonfair.org.cn/zh-CN/detailed?seller=import&category=&scategory=&type=1&keyword=&page=1&size=20&tab=exhibitor&sort=relate^%^20desc&filter=18f0ed42f27-a7f1^",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
            "x-requested-with": "XMLHttpRequest",
            "x-user-lan": "zh-CN"
        }
        url = "https://www.cantonfair.org.cn/b2bshop/api/themeRos/public/productShops/searchByVariables"
        return [scrapy.Request(url + '?' + urlencode(i), headers=headers, callback=self.parse, meta={'param': i, 'datas': queue.Queue()}) for i in p]

    def parse(self, response, **kwargs):
        datas = response.json()
        for i in datas['_embedded']['b2b:shops']:
            item = {}
            country = i['address']['country']['name'] if i['address'].get('country') else ""
            province = i['address']['province']['name'] if i['address'].get('province') else ""
            item['country'] = country + province
            item['name'] = i['name']
            item['address'] = i['address']['detailAddress']
            item['url'] = i['udfs']['website']
            item['products'] = i['udfs']['mainProducts']
            item['shop_id'] = i['code']
            item['state'] = 1
            response.meta.get("datas").put(item)
            print(item)
        print("number----", datas['page'])
        if datas['page']['number'] >= (datas['page']['totalPages']*int(response.meta['param']['size'])):
            items = [response.meta.get("datas").get() for i in range(response.meta.get("datas").qsize())]
            df = pd.DataFrame(items)
            print("去重前", df.shape)
            df.drop_duplicates(['shop_id'], inplace=True)
            print("去重后", df.shape)
            df.to_excel(f"./cantonfair_{self.name}_{datas['page']['totalElements']}_1.xlsx")
            # df.to_csv(f"./cantonfair_{datas['page']['number']}.csv")
            logging.info(f"爬取完成 {len(items)}")
        else:
            headers = {
                "accept": "application/json, text/plain, */*",
                "accept-language": "zh-CN",
                "cache-control": "no-cache",
                "pragma": "no-cache",
                "referer": "https://www.cantonfair.org.cn/zh-CN/detailed?seller=import&category=&scategory=&type=1&keyword=&page=1&size=20&tab=exhibitor&sort=relate^%^20desc&filter=18f0ed42f27-a7f1^",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
                "x-requested-with": "XMLHttpRequest",
                "x-user-lan": "zh-CN"
            }
            url = "https://www.cantonfair.org.cn/b2bshop/api/themeRos/public/productShops/searchByVariables"
            i = response.meta['param']
            i['page'] = str(int(i['page'])+1)
            response.meta['param'] = i
            print(response.meta)
            yield scrapy.Request(url + '?' + urlencode(i), headers=headers, callback=self.parse, meta=response.meta)

    def err_parse(self, failure, **kwargs):
        request = failure.request
        logging.info(f"error______ {failure.getErrorMessage()}, {failure.request.meta.get('asin')}")


if __name__ == '__main__':
    args = 'scrapy crawl b2b_shop_api'.split()
    cmdline.execute(args)

# source activate pyspark
# nohup cd /mnt/hezhe/amazon_spider/amazon_spider scrapy crawl amazon_comment_history -a site=us  > amazon.log 2>&1 &
# nohup scrapy crawl amazon_comment_all -a site=us > amazon_history_comment_us1.log 2>&1 &
# for i in `ps -ef|grep "scrapy crawl amazon_comment_all" |awk '{print $2}' `; do kill -9 $i ; done;
# nohup /mnt/hezhe/amazon_spider/amazon_spider && /opt/module/anaconda3/envs/pyspark/bin/scrapy crawl amazon_comment -a site=us > amazon1.log 2>&1 &
# nohup scrapy crawl amazon_comment -a site=de > amazon_de1.log 2>&1 &



