


from datetime import datetime, timedelta
from all_connect import ConnectSpider
Con = ConnectSpider()
from lxml import etree
import requests
import json
import datetime
from urllib.parse import quote
import threading
import queue
from queue import Queue
import re
import time
from multiprocessing import Pool, Manager, Lock
class HomeDepot:
    def __init__(self):
        self.headers = {
            'Referer': 'https://www.homedepot.com/s/02%20sensor%20downstream?NCNI-5',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
            'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'x-experience-name': 'hd-home',
        }
        self.cookies = {
            'AMCV_F6421253512D2C100A490D45%40AdobeOrg': 'MCMID|16129746417315936432497630021679731371',
        }
        self.order = 'DESC' #降序
        self.get_sort_by = 'bestmatch'
        self.post_sort_by = 'BEST_MATCH'

        self.item_queue = Queue()
        self.success_queue = Queue()
        self.failed_queue = Queue()
        self.invalid_queue = Queue()
        self.lock = threading.Lock()  # 创建线程锁

    def post_req_24(self, keyword, page, search_id):
        retries = 0
        max_retries = 3
        encoded_keyword = quote(keyword)
        while retries < max_retries:
            try:
                params = {
                    'opname': 'searchModel',
                }
                json_data = {
                    'operationName': 'searchModel',
                    'variables': {
                        'skipInstallServices': False,
                        'skipFavoriteCount': False,
                        'skipKPF': False,
                        'skipSpecificationGroup': False,
                        'storefilter': 'ALL',
                        'channel': 'DESKTOP',
                        'skipDiscoveryZones': False,
                        'skipBuyitagain': True,
                        'additionalSearchParams': {
                            'sponsored': True,
                            'mcvisId': '16129746417315936432497630021679731371',
                            'deliveryZip': '90250',
                            'multiStoreIds': [],
                        },
                        'filter': {},
                        'isBrandPricingPolicyCompliant': False,
                        'keyword': str(encoded_keyword),
                        'navParam': None,
                        'orderBy': {
                            'field': str(self.post_sort_by),
                            'order': str(self.order),
                        },
                        'pageSize': 48,
                        'startIndex': page,
                        'storeId': '620',
                    },
                    'query': 'query searchModel($storeId: String, $zipCode: String, $skipInstallServices: Boolean = true, $startIndex: Int, $pageSize: Int, $orderBy: ProductSort, $filter: ProductFilter, $isBrandPricingPolicyCompliant: Boolean, $skipFavoriteCount: Boolean = false, $skipKPF: Boolean = false, $skipSpecificationGroup: Boolean = false, $keyword: String, $navParam: String, $storefilter: StoreFilter = ALL, $channel: Channel = DESKTOP, $additionalSearchParams: AdditionalParams, $loyaltyMembershipInput: LoyaltyMembershipInput, $dataSource: String, $skipDiscoveryZones: Boolean = true, $skipBuyitagain: Boolean = true) {\n  searchModel(\n    keyword: $keyword\n    navParam: $navParam\n    storefilter: $storefilter\n    storeId: $storeId\n    isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n    channel: $channel\n    additionalSearchParams: $additionalSearchParams\n    loyaltyMembershipInput: $loyaltyMembershipInput\n  ) {\n    metadata {\n      hasPLPBanner\n      categoryID\n      analytics {\n        semanticTokens\n        dynamicLCA\n        __typename\n      }\n      canonicalUrl\n      searchRedirect\n      clearAllRefinementsURL\n      contentType\n      h1Tag\n      isStoreDisplay\n      productCount {\n        inStore\n        __typename\n      }\n      stores {\n        storeId\n        storeName\n        address {\n          postalCode\n          __typename\n        }\n        nearByStores {\n          storeId\n          storeName\n          distance\n          address {\n            postalCode\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    products(\n      startIndex: $startIndex\n      pageSize: $pageSize\n      orderBy: $orderBy\n      filter: $filter\n    ) {\n      identifiers {\n        storeSkuNumber\n        canonicalUrl\n        brandName\n        itemId\n        productLabel\n        productType\n        parentId\n        specialOrderSku\n        modelNumber\n        __typename\n      }\n      installServices(storeId: $storeId, zipCode: $zipCode) @skip(if: $skipInstallServices) {\n        scheduleAMeasure\n        gccCarpetDesignAndOrderEligible\n        __typename\n      }\n      itemId\n      dataSources\n      media {\n        images {\n          url\n          type\n          subType\n          sizes\n          __typename\n        }\n        __typename\n      }\n      pricing(\n        storeId: $storeId\n        isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n      ) {\n        value\n        original\n        preferredPriceFlag\n        promotion {\n          dates {\n            start\n            end\n            __typename\n          }\n          description {\n            shortDesc\n            longDesc\n            __typename\n          }\n          experienceTag\n          subExperienceTag\n          type\n          dollarOff\n          percentageOff\n          promotionTag\n          savingsCenter\n          savingsCenterPromos\n          specialBuySavings\n          specialBuyDollarOff\n          specialBuyPercentageOff\n          __typename\n        }\n        conditionalPromotions {\n          promotionId\n          skuItemGroup\n          promotionTags\n          eligibilityCriteria {\n            itemGroup\n            minThresholdVal\n            thresholdType\n            __typename\n          }\n          reward {\n            tiers {\n              minThresholdVal\n              thresholdType\n              rewardVal\n              rewardType\n              rewardLevel\n              maxAllowedRewardAmount\n              __typename\n            }\n            __typename\n          }\n          __typename\n        }\n        alternatePriceDisplay\n        alternate {\n          bulk {\n            pricePerUnit\n            thresholdQuantity\n            value\n            __typename\n          }\n          unit {\n            caseUnitOfMeasure\n            unitsOriginalPrice\n            unitsPerCase\n            value\n            __typename\n          }\n          __typename\n        }\n        mapAboveOriginalPrice\n        mapDetail {\n          percentageOff\n          dollarOff\n          mapPolicy\n          mapOriginalPriceViolation\n          mapSpecialPriceViolation\n          __typename\n        }\n        message\n        specialBuy\n        unitOfMeasure\n        clearance {\n          value\n          dollarOff\n          percentageOff\n          unitsClearancePrice\n          __typename\n        }\n        __typename\n      }\n      reviews {\n        ratingsReviews {\n          averageRating\n          totalReviews\n          __typename\n        }\n        __typename\n      }\n      badges(storeId: $storeId) {\n        name\n        label\n        __typename\n      }\n      info {\n        isSponsored\n        sponsoredMetadata {\n          campaignId\n          placementId\n          slotId\n          sponsoredId\n          trackSource\n          __typename\n        }\n        sponsoredBeacon {\n          onClickBeacon\n          onViewBeacon\n          onClickBeacons\n          onViewBeacons\n          __typename\n        }\n        productSubType {\n          name\n          link\n          __typename\n        }\n        augmentedReality\n        globalCustomConfigurator {\n          customExperience\n          __typename\n        }\n        hidePrice\n        ecoRebate\n        quantityLimit\n        categoryHierarchy\n        sskMin\n        sskMax\n        unitOfMeasureCoverage\n        wasMaxPriceRange\n        wasMinPriceRange\n        swatches {\n          isSelected\n          itemId\n          label\n          swatchImgUrl\n          url\n          value\n          __typename\n        }\n        totalNumberOfOptions\n        customerSignal {\n          previouslyPurchased\n          __typename\n        }\n        isBuryProduct\n        isGenericProduct\n        returnable\n        samplesAvailable\n        isLiveGoodsProduct\n        classNumber\n        hasSubscription\n        productDepartment\n        __typename\n      }\n      dataSource\n      favoriteDetail @skip(if: $skipFavoriteCount) {\n        count\n        __typename\n      }\n      taxonomy {\n        breadCrumbs {\n          label\n          __typename\n        }\n        __typename\n      }\n      details {\n        installation {\n          serviceType\n          __typename\n        }\n        collection {\n          name\n          url\n          __typename\n        }\n        __typename\n      }\n      fulfillment(storeId: $storeId, zipCode: $zipCode) {\n        anchorStoreStatus\n        anchorStoreStatusType\n        backordered\n        backorderedShipDate\n        bossExcludedShipStates\n        excludedShipStates\n        seasonStatusEligible\n        fulfillmentOptions {\n          type\n          fulfillable\n          services {\n            deliveryTimeline\n            deliveryDates {\n              startDate\n              endDate\n              __typename\n            }\n            deliveryCharge\n            dynamicEta {\n              hours\n              minutes\n              __typename\n            }\n            hasFreeShipping\n            freeDeliveryThreshold\n            locations {\n              curbsidePickupFlag\n              isBuyInStoreCheckNearBy\n              distance\n              inventory {\n                isOutOfStock\n                isInStock\n                isLimitedQuantity\n                isUnavailable\n                quantity\n                maxAllowedBopisQty\n                minAllowedBopisQty\n                __typename\n              }\n              isAnchor\n              locationId\n              state\n              storeName\n              storePhone\n              type\n              __typename\n            }\n            type\n            totalCharge\n            earliestDeliveryDate\n            deliveryMessage\n            __typename\n          }\n          __typename\n        }\n        onlineStoreStatus\n        onlineStoreStatusType\n        fulfillmentBundleMessage\n        __typename\n      }\n      availabilityType {\n        type\n        buyable\n        discontinued\n        status\n        __typename\n      }\n      bundleFlag\n      keyProductFeatures @skip(if: $skipKPF) {\n        keyProductFeaturesItems {\n          features {\n            name\n            refinementId\n            refinementUrl\n            value\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      specificationGroup @skip(if: $skipSpecificationGroup) {\n        specifications {\n          specName\n          specValue\n          __typename\n        }\n        specTitle\n        __typename\n      }\n      bundleItems {\n        id\n        quantity\n        __typename\n      }\n      __typename\n    }\n    id\n    searchReport {\n      totalProducts\n      didYouMean\n      correctedKeyword\n      keyword\n      pageSize\n      searchUrl\n      sortBy\n      sortOrder\n      startIndex\n      __typename\n    }\n    relatedResults {\n      universalSearch {\n        title\n        __typename\n      }\n      relatedServices {\n        label\n        __typename\n      }\n      visualNavs {\n        label\n        imageId\n        webUrl\n        categoryId\n        imageURL\n        __typename\n      }\n      visualNavContainsEvents\n      relatedKeywords {\n        keyword\n        __typename\n      }\n      __typename\n    }\n    taxonomy {\n      brandLinkUrl\n      breadCrumbs {\n        browseUrl\n        creativeIconUrl\n        deselectUrl\n        dimensionId\n        dimensionName\n        label\n        refinementKey\n        url\n        __typename\n      }\n      __typename\n    }\n    templates\n    partialTemplates\n    dimensions {\n      label\n      refinements {\n        refinementKey\n        url\n        label\n        recordCount\n        selected\n        imgUrl\n        nestedRefinements {\n          label\n          url\n          recordCount\n          refinementKey\n          __typename\n        }\n        __typename\n      }\n      collapse\n      dimensionId\n      isVisualNav\n      isVisualDimension\n      isNumericFilter\n      isColorSwatch\n      nestedRefinementsLimit\n      visualNavSequence\n      __typename\n    }\n    orangeGraph {\n      universalSearchArray {\n        pods {\n          title\n          description\n          imageUrl\n          link\n          isProContent\n          recordType\n          __typename\n        }\n        info {\n          title\n          __typename\n        }\n        __typename\n      }\n      productTypes\n      __typename\n    }\n    discoveryZones @skip(if: $skipDiscoveryZones) {\n      products(dataSource: $dataSource) {\n        itemId\n        dataSources\n        badges(storeId: $storeId) {\n          name\n          __typename\n        }\n        info {\n          isSponsored\n          sponsoredMetadata {\n            campaignId\n            placementId\n            slotId\n            sponsoredId\n            trackSource\n            __typename\n          }\n          sponsoredBeacon {\n            onClickBeacon\n            onViewBeacon\n            onClickBeacons\n            onViewBeacons\n            __typename\n          }\n          productSubType {\n            name\n            __typename\n          }\n          augmentedReality\n          globalCustomConfigurator {\n            customExperience\n            __typename\n          }\n          swatches {\n            isSelected\n            itemId\n            label\n            swatchImgUrl\n            url\n            value\n            __typename\n          }\n          totalNumberOfOptions\n          hidePrice\n          ecoRebate\n          quantityLimit\n          categoryHierarchy\n          sskMin\n          sskMax\n          unitOfMeasureCoverage\n          wasMaxPriceRange\n          wasMinPriceRange\n          __typename\n        }\n        identifiers {\n          canonicalUrl\n          productType\n          productLabel\n          modelNumber\n          storeSkuNumber\n          itemId\n          brandName\n          parentId\n          __typename\n        }\n        media {\n          images {\n            url\n            type\n            subType\n            sizes\n            __typename\n          }\n          __typename\n        }\n        dataSource\n        details {\n          collection {\n            name\n            url\n            __typename\n          }\n          __typename\n        }\n        pricing(\n          storeId: $storeId\n          isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n        ) {\n          alternatePriceDisplay\n          alternate {\n            bulk {\n              pricePerUnit\n              thresholdQuantity\n              value\n              __typename\n            }\n            unit {\n              caseUnitOfMeasure\n              unitsOriginalPrice\n              unitsPerCase\n              value\n              __typename\n            }\n            __typename\n          }\n          original\n          mapAboveOriginalPrice\n          mapDetail {\n            percentageOff\n            dollarOff\n            mapPolicy\n            mapOriginalPriceViolation\n            mapSpecialPriceViolation\n            __typename\n          }\n          message\n          preferredPriceFlag\n          promotion {\n            type\n            description {\n              shortDesc\n              longDesc\n              __typename\n            }\n            dollarOff\n            percentageOff\n            promotionTag\n            savingsCenter\n            savingsCenterPromos\n            specialBuySavings\n            specialBuyDollarOff\n            specialBuyPercentageOff\n            __typename\n          }\n          specialBuy\n          unitOfMeasure\n          value\n          __typename\n        }\n        taxonomy {\n          breadCrumbs {\n            label\n            __typename\n          }\n          __typename\n        }\n        reviews {\n          ratingsReviews {\n            averageRating\n            totalReviews\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      metadata {\n        zone\n        zoneTitle\n        __typename\n      }\n      __typename\n    }\n    appliedDimensions {\n      label\n      refinements {\n        label\n        refinementKey\n        url\n        __typename\n      }\n      isNumericFilter\n      __typename\n    }\n    primaryFilters {\n      collapse\n      dimensionId\n      isVisualNav\n      isVisualDimension\n      isNumericFilter\n      isColorSwatch\n      label\n      nestedRefinementsLimit\n      refinements {\n        label\n        refinementKey\n        recordCount\n        selected\n        imgUrl\n        url\n        nestedRefinements {\n          label\n          url\n          recordCount\n          refinementKey\n          __typename\n        }\n        __typename\n      }\n      visualNavSequence\n      __typename\n    }\n    buyitagain(dataSource: $dataSource) @skip(if: $skipBuyitagain) {\n      itemId\n      dataSources\n      badges(storeId: $storeId) {\n        name\n        __typename\n      }\n      info {\n        isSponsored\n        sponsoredMetadata {\n          campaignId\n          placementId\n          slotId\n          sponsoredId\n          trackSource\n          __typename\n        }\n        sponsoredBeacon {\n          onClickBeacon\n          onViewBeacon\n          onClickBeacons\n          onViewBeacons\n          __typename\n        }\n        productSubType {\n          name\n          link\n          __typename\n        }\n        augmentedReality\n        globalCustomConfigurator {\n          customExperience\n          __typename\n        }\n        customerSignal {\n          previouslyPurchased\n          __typename\n        }\n        isBuryProduct\n        isGenericProduct\n        returnable\n        hidePrice\n        ecoRebate\n        quantityLimit\n        categoryHierarchy\n        sskMin\n        sskMax\n        unitOfMeasureCoverage\n        wasMaxPriceRange\n        wasMinPriceRange\n        __typename\n      }\n      identifiers {\n        canonicalUrl\n        productType\n        productLabel\n        modelNumber\n        storeSkuNumber\n        itemId\n        brandName\n        specialOrderSku\n        __typename\n      }\n      media {\n        images {\n          url\n          type\n          subType\n          sizes\n          __typename\n        }\n        __typename\n      }\n      details {\n        installation {\n          serviceType\n          __typename\n        }\n        collection {\n          name\n          url\n          __typename\n        }\n        __typename\n      }\n      fulfillment(storeId: $storeId, zipCode: $zipCode) {\n        anchorStoreStatus\n        anchorStoreStatusType\n        backordered\n        backorderedShipDate\n        bossExcludedShipStates\n        excludedShipStates\n        seasonStatusEligible\n        fulfillmentOptions {\n          type\n          fulfillable\n          services {\n            deliveryTimeline\n            deliveryDates {\n              startDate\n              endDate\n              __typename\n            }\n            deliveryCharge\n            dynamicEta {\n              hours\n              minutes\n              __typename\n            }\n            hasFreeShipping\n            freeDeliveryThreshold\n            locations {\n              curbsidePickupFlag\n              isBuyInStoreCheckNearBy\n              distance\n              inventory {\n                isOutOfStock\n                isInStock\n                isLimitedQuantity\n                isUnavailable\n                quantity\n                maxAllowedBopisQty\n                minAllowedBopisQty\n                __typename\n              }\n              isAnchor\n              locationId\n              state\n              storeName\n              storePhone\n              type\n              __typename\n            }\n            type\n            totalCharge\n            __typename\n          }\n          __typename\n        }\n        onlineStoreStatus\n        onlineStoreStatusType\n        __typename\n      }\n      installServices(storeId: $storeId, zipCode: $zipCode) @skip(if: $skipInstallServices) {\n        scheduleAMeasure\n        gccCarpetDesignAndOrderEligible\n        __typename\n      }\n      taxonomy {\n        breadCrumbs {\n          label\n          __typename\n        }\n        __typename\n      }\n      pricing(\n        storeId: $storeId\n        isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n      ) {\n        alternatePriceDisplay\n        alternate {\n          bulk {\n            pricePerUnit\n            thresholdQuantity\n            value\n            __typename\n          }\n          unit {\n            caseUnitOfMeasure\n            unitsOriginalPrice\n            unitsPerCase\n            value\n            __typename\n          }\n          __typename\n        }\n        original\n        mapAboveOriginalPrice\n        mapDetail {\n          percentageOff\n          dollarOff\n          mapPolicy\n          mapOriginalPriceViolation\n          mapSpecialPriceViolation\n          __typename\n        }\n        message\n        preferredPriceFlag\n        promotion {\n          type\n          description {\n            shortDesc\n            longDesc\n            __typename\n          }\n          dollarOff\n          percentageOff\n          promotionTag\n          savingsCenter\n          savingsCenterPromos\n          specialBuySavings\n          specialBuyDollarOff\n          specialBuyPercentageOff\n          __typename\n        }\n        specialBuy\n        unitOfMeasure\n        value\n        __typename\n      }\n      dataSource\n      __typename\n    }\n    __typename\n  }\n}',
                }
                response = requests.post('https://apionline.homedepot.com/federation-gateway/graphql',params=params,cookies=self.cookies,headers=self.headers,json=json_data)
                if response.status_code == 200:
                    return response.json()
                else:
                    print(f"{keyword} post_req_24 Status code {response.status_code}")
                    return None

            except Exception as e:
                if "Connection aborted." in str(e):
                    retries += 1
                    print(f'{keyword}请求连接错误：{e},重试次数: {retries}')
                else:
                    print(f'{keyword} post_req_24 error:{e}')
                    return None

        print(f'{keyword}超过重试次数')
        try:
            self.failed_queue.put([search_id])
            print(f'将{keyword}失败id:{search_id} 放入失败队列')
        except Exception as e:
            print('failed_queue队列已满，写入失败')
        return None

    def post2_req_48(self, keyword,page,search_id):
        retries = 0
        max_retries = 3
        encoded_keyword = quote(keyword)
        while retries < max_retries:
            try:
                res = requests.get(f'https://www.homedepot.com/s/{encoded_keyword}', headers=self.headers,cookies=self.cookies)
                if res.status_code == 200:
                    EXPERIENCE_PROPS = re.findall('__EXPERIENCE_PROPS__ = (.*?);</script>', res.text)[0]
                    navparam = json.loads(EXPERIENCE_PROPS)['redirect']['navparam']
                    params = {
                        'opname': 'searchModel',
                    }
                    json_data = {
                        'operationName': 'searchModel',
                        'variables': {
                            'storefilter': 'ALL',
                            'channel': 'DESKTOP',
                            'skipInstallServices': False,
                            'skipFavoriteCount': False,
                            'skipDiscoveryZones': False,
                            'skipBuyitagain': True,
                            'additionalSearchParams': {
                                'sponsored': True,
                                'mcvisId': '16129746417315936432497630021679731371',
                                'deliveryZip': '90250',
                                'multiStoreIds': [],
                            },
                            'filter': {},
                            'isBrandPricingPolicyCompliant': False,
                            'navParam': str(navparam),
                            'orderBy': {
                                'field': str(self.post_sort_by),
                                'order': str(self.order),
                            },
                            'pageSize': 48,
                            'startIndex': page,
                            'storeId': '620',
                        },
                        'query': 'query searchModel($keyword: String, $navParam: String, $storefilter: StoreFilter = ALL, $isBrandPricingPolicyCompliant: Boolean, $storeId: String, $channel: Channel = DESKTOP, $additionalSearchParams: AdditionalParams, $loyaltyMembershipInput: LoyaltyMembershipInput, $startIndex: Int, $pageSize: Int, $orderBy: ProductSort, $filter: ProductFilter, $zipCode: String, $skipInstallServices: Boolean = true, $skipFavoriteCount: Boolean = false, $dataSource: String, $skipDiscoveryZones: Boolean = true, $skipBuyitagain: Boolean = true) {\n  searchModel(\n    keyword: $keyword\n    navParam: $navParam\n    storefilter: $storefilter\n    isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n    storeId: $storeId\n    channel: $channel\n    additionalSearchParams: $additionalSearchParams\n    loyaltyMembershipInput: $loyaltyMembershipInput\n  ) {\n    metadata {\n      hasPLPBanner\n      categoryID\n      analytics {\n        semanticTokens\n        dynamicLCA\n        __typename\n      }\n      canonicalUrl\n      searchRedirect\n      clearAllRefinementsURL\n      contentType\n      h1Tag\n      isStoreDisplay\n      productCount {\n        inStore\n        __typename\n      }\n      stores {\n        storeId\n        storeName\n        address {\n          postalCode\n          __typename\n        }\n        nearByStores {\n          storeId\n          storeName\n          distance\n          address {\n            postalCode\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    id\n    searchReport {\n      totalProducts\n      didYouMean\n      correctedKeyword\n      keyword\n      pageSize\n      searchUrl\n      sortBy\n      sortOrder\n      startIndex\n      __typename\n    }\n    relatedResults {\n      universalSearch {\n        title\n        __typename\n      }\n      relatedServices {\n        label\n        __typename\n      }\n      visualNavs {\n        label\n        imageId\n        webUrl\n        categoryId\n        imageURL\n        __typename\n      }\n      visualNavContainsEvents\n      relatedKeywords {\n        keyword\n        __typename\n      }\n      __typename\n    }\n    products(\n      startIndex: $startIndex\n      pageSize: $pageSize\n      orderBy: $orderBy\n      filter: $filter\n    ) {\n      itemId\n      dataSources\n      identifiers {\n        canonicalUrl\n        brandName\n        itemId\n        productLabel\n        productType\n        parentId\n        modelNumber\n        storeSkuNumber\n        specialOrderSku\n        isSuperSku\n        sampleId\n        __typename\n      }\n      media {\n        images {\n          url\n          type\n          subType\n          sizes\n          __typename\n        }\n        __typename\n      }\n      pricing(\n        storeId: $storeId\n        isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n      ) {\n        value\n        original\n        promotion {\n          dates {\n            start\n            end\n            __typename\n          }\n          description {\n            shortDesc\n            longDesc\n            __typename\n          }\n          experienceTag\n          subExperienceTag\n          type\n          dollarOff\n          percentageOff\n          promotionTag\n          savingsCenter\n          savingsCenterPromos\n          specialBuySavings\n          specialBuyDollarOff\n          specialBuyPercentageOff\n          __typename\n        }\n        preferredPriceFlag\n        conditionalPromotions {\n          promotionId\n          skuItemGroup\n          promotionTags\n          eligibilityCriteria {\n            itemGroup\n            minThresholdVal\n            thresholdType\n            __typename\n          }\n          reward {\n            tiers {\n              minThresholdVal\n              thresholdType\n              rewardVal\n              rewardType\n              rewardLevel\n              maxAllowedRewardAmount\n              __typename\n            }\n            __typename\n          }\n          __typename\n        }\n        alternatePriceDisplay\n        alternate {\n          bulk {\n            pricePerUnit\n            thresholdQuantity\n            value\n            __typename\n          }\n          unit {\n            caseUnitOfMeasure\n            unitsOriginalPrice\n            unitsPerCase\n            value\n            __typename\n          }\n          __typename\n        }\n        mapAboveOriginalPrice\n        mapDetail {\n          percentageOff\n          dollarOff\n          mapPolicy\n          mapOriginalPriceViolation\n          mapSpecialPriceViolation\n          __typename\n        }\n        message\n        specialBuy\n        unitOfMeasure\n        clearance {\n          value\n          dollarOff\n          percentageOff\n          unitsClearancePrice\n          __typename\n        }\n        __typename\n      }\n      reviews {\n        ratingsReviews {\n          averageRating\n          totalReviews\n          __typename\n        }\n        __typename\n      }\n      badges(storeId: $storeId) {\n        name\n        label\n        __typename\n      }\n      info {\n        isSponsored\n        sponsoredMetadata {\n          campaignId\n          placementId\n          slotId\n          sponsoredId\n          trackSource\n          __typename\n        }\n        sponsoredBeacon {\n          onClickBeacon\n          onViewBeacon\n          onClickBeacons\n          onViewBeacons\n          __typename\n        }\n        productSubType {\n          name\n          link\n          __typename\n        }\n        productDepartmentId\n        paintBrand\n        dotComColorEligible\n        isLiveGoodsProduct\n        augmentedReality\n        globalCustomConfigurator {\n          customExperience\n          __typename\n        }\n        samplesAvailable\n        swatches {\n          isSelected\n          itemId\n          label\n          swatchImgUrl\n          url\n          value\n          __typename\n        }\n        totalNumberOfOptions\n        hidePrice\n        ecoRebate\n        quantityLimit\n        categoryHierarchy\n        sskMin\n        sskMax\n        unitOfMeasureCoverage\n        wasMaxPriceRange\n        wasMinPriceRange\n        isGenericProduct\n        classNumber\n        hasSubscription\n        productDepartment\n        customerSignal {\n          previouslyPurchased\n          __typename\n        }\n        isBuryProduct\n        returnable\n        __typename\n      }\n      details {\n        collection {\n          collectionId\n          name\n          url\n          __typename\n        }\n        highlights\n        installation {\n          serviceType\n          __typename\n        }\n        __typename\n      }\n      installServices(storeId: $storeId, zipCode: $zipCode) @skip(if: $skipInstallServices) {\n        scheduleAMeasure\n        gccCarpetDesignAndOrderEligible\n        __typename\n      }\n      availabilityType {\n        type\n        buyable\n        discontinued\n        status\n        __typename\n      }\n      fulfillment(storeId: $storeId, zipCode: $zipCode) {\n        fulfillmentOptions {\n          services {\n            type\n            locations {\n              isAnchor\n              inventory {\n                quantity\n                isOutOfStock\n                isInStock\n                isLimitedQuantity\n                isUnavailable\n                maxAllowedBopisQty\n                minAllowedBopisQty\n                __typename\n              }\n              curbsidePickupFlag\n              isBuyInStoreCheckNearBy\n              distance\n              locationId\n              state\n              storeName\n              storePhone\n              type\n              __typename\n            }\n            hasFreeShipping\n            freeDeliveryThreshold\n            earliestDeliveryDate\n            totalCharge\n            deliveryTimeline\n            deliveryDates {\n              startDate\n              endDate\n              __typename\n            }\n            deliveryCharge\n            dynamicEta {\n              hours\n              minutes\n              __typename\n            }\n            deliveryMessage\n            __typename\n          }\n          type\n          fulfillable\n          __typename\n        }\n        anchorStoreStatus\n        anchorStoreStatusType\n        backordered\n        backorderedShipDate\n        bossExcludedShipStates\n        excludedShipStates\n        seasonStatusEligible\n        fulfillmentBundleMessage\n        onlineStoreStatus\n        onlineStoreStatusType\n        __typename\n      }\n      dataSource\n      favoriteDetail @skip(if: $skipFavoriteCount) {\n        count\n        __typename\n      }\n      taxonomy {\n        breadCrumbs {\n          label\n          __typename\n        }\n        __typename\n      }\n      bundleFlag\n      specificationGroup {\n        specifications {\n          specName\n          specValue\n          __typename\n        }\n        specTitle\n        __typename\n      }\n      bundleItems {\n        id\n        quantity\n        __typename\n      }\n      __typename\n    }\n    taxonomy {\n      brandLinkUrl\n      breadCrumbs {\n        browseUrl\n        creativeIconUrl\n        deselectUrl\n        dimensionId\n        dimensionName\n        label\n        refinementKey\n        url\n        __typename\n      }\n      __typename\n    }\n    templates\n    discoveryZones @skip(if: $skipDiscoveryZones) {\n      products(dataSource: $dataSource) {\n        itemId\n        dataSources\n        badges(storeId: $storeId) {\n          name\n          __typename\n        }\n        info {\n          isSponsored\n          sponsoredMetadata {\n            campaignId\n            placementId\n            slotId\n            sponsoredId\n            trackSource\n            __typename\n          }\n          sponsoredBeacon {\n            onClickBeacon\n            onViewBeacon\n            onClickBeacons\n            onViewBeacons\n            __typename\n          }\n          productSubType {\n            name\n            __typename\n          }\n          augmentedReality\n          globalCustomConfigurator {\n            customExperience\n            __typename\n          }\n          swatches {\n            isSelected\n            itemId\n            label\n            swatchImgUrl\n            url\n            value\n            __typename\n          }\n          totalNumberOfOptions\n          hidePrice\n          ecoRebate\n          quantityLimit\n          categoryHierarchy\n          sskMin\n          sskMax\n          unitOfMeasureCoverage\n          wasMaxPriceRange\n          wasMinPriceRange\n          __typename\n        }\n        identifiers {\n          canonicalUrl\n          productType\n          productLabel\n          modelNumber\n          storeSkuNumber\n          itemId\n          brandName\n          parentId\n          __typename\n        }\n        media {\n          images {\n            url\n            type\n            subType\n            sizes\n            __typename\n          }\n          __typename\n        }\n        dataSource\n        details {\n          collection {\n            name\n            url\n            __typename\n          }\n          __typename\n        }\n        pricing(\n          storeId: $storeId\n          isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n        ) {\n          alternatePriceDisplay\n          alternate {\n            bulk {\n              pricePerUnit\n              thresholdQuantity\n              value\n              __typename\n            }\n            unit {\n              caseUnitOfMeasure\n              unitsOriginalPrice\n              unitsPerCase\n              value\n              __typename\n            }\n            __typename\n          }\n          original\n          mapAboveOriginalPrice\n          mapDetail {\n            percentageOff\n            dollarOff\n            mapPolicy\n            mapOriginalPriceViolation\n            mapSpecialPriceViolation\n            __typename\n          }\n          message\n          preferredPriceFlag\n          promotion {\n            type\n            description {\n              shortDesc\n              longDesc\n              __typename\n            }\n            dollarOff\n            percentageOff\n            promotionTag\n            savingsCenter\n            savingsCenterPromos\n            specialBuySavings\n            specialBuyDollarOff\n            specialBuyPercentageOff\n            __typename\n          }\n          specialBuy\n          unitOfMeasure\n          value\n          __typename\n        }\n        taxonomy {\n          breadCrumbs {\n            label\n            __typename\n          }\n          __typename\n        }\n        reviews {\n          ratingsReviews {\n            averageRating\n            totalReviews\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      metadata {\n        zone\n        zoneTitle\n        __typename\n      }\n      __typename\n    }\n    partialTemplates\n    dimensions {\n      label\n      refinements {\n        refinementKey\n        url\n        label\n        recordCount\n        selected\n        imgUrl\n        nestedRefinements {\n          label\n          url\n          recordCount\n          refinementKey\n          __typename\n        }\n        __typename\n      }\n      collapse\n      dimensionId\n      isVisualNav\n      isVisualDimension\n      isNumericFilter\n      isColorSwatch\n      nestedRefinementsLimit\n      visualNavSequence\n      __typename\n    }\n    orangeGraph {\n      universalSearchArray {\n        pods {\n          title\n          description\n          imageUrl\n          link\n          isProContent\n          recordType\n          __typename\n        }\n        info {\n          title\n          __typename\n        }\n        __typename\n      }\n      productTypes\n      __typename\n    }\n    appliedDimensions {\n      label\n      refinements {\n        label\n        refinementKey\n        url\n        __typename\n      }\n      isNumericFilter\n      __typename\n    }\n    primaryFilters {\n      collapse\n      dimensionId\n      isVisualNav\n      isVisualDimension\n      isNumericFilter\n      isColorSwatch\n      label\n      nestedRefinementsLimit\n      refinements {\n        label\n        refinementKey\n        recordCount\n        selected\n        imgUrl\n        url\n        nestedRefinements {\n          label\n          url\n          recordCount\n          refinementKey\n          __typename\n        }\n        __typename\n      }\n      visualNavSequence\n      __typename\n    }\n    buyitagain(dataSource: $dataSource) @skip(if: $skipBuyitagain) {\n      itemId\n      dataSources\n      badges(storeId: $storeId) {\n        name\n        __typename\n      }\n      info {\n        isSponsored\n        sponsoredMetadata {\n          campaignId\n          placementId\n          slotId\n          sponsoredId\n          trackSource\n          __typename\n        }\n        sponsoredBeacon {\n          onClickBeacon\n          onViewBeacon\n          onClickBeacons\n          onViewBeacons\n          __typename\n        }\n        productSubType {\n          name\n          link\n          __typename\n        }\n        augmentedReality\n        globalCustomConfigurator {\n          customExperience\n          __typename\n        }\n        customerSignal {\n          previouslyPurchased\n          __typename\n        }\n        isBuryProduct\n        isGenericProduct\n        returnable\n        hidePrice\n        ecoRebate\n        quantityLimit\n        categoryHierarchy\n        sskMin\n        sskMax\n        unitOfMeasureCoverage\n        wasMaxPriceRange\n        wasMinPriceRange\n        __typename\n      }\n      identifiers {\n        canonicalUrl\n        productType\n        productLabel\n        modelNumber\n        storeSkuNumber\n        itemId\n        brandName\n        specialOrderSku\n        __typename\n      }\n      media {\n        images {\n          url\n          type\n          subType\n          sizes\n          __typename\n        }\n        __typename\n      }\n      details {\n        installation {\n          serviceType\n          __typename\n        }\n        collection {\n          name\n          url\n          __typename\n        }\n        __typename\n      }\n      fulfillment(storeId: $storeId, zipCode: $zipCode) {\n        anchorStoreStatus\n        anchorStoreStatusType\n        backordered\n        backorderedShipDate\n        bossExcludedShipStates\n        excludedShipStates\n        seasonStatusEligible\n        fulfillmentOptions {\n          type\n          fulfillable\n          services {\n            deliveryTimeline\n            deliveryDates {\n              startDate\n              endDate\n              __typename\n            }\n            deliveryCharge\n            dynamicEta {\n              hours\n              minutes\n              __typename\n            }\n            hasFreeShipping\n            freeDeliveryThreshold\n            locations {\n              curbsidePickupFlag\n              isBuyInStoreCheckNearBy\n              distance\n              inventory {\n                isOutOfStock\n                isInStock\n                isLimitedQuantity\n                isUnavailable\n                quantity\n                maxAllowedBopisQty\n                minAllowedBopisQty\n                __typename\n              }\n              isAnchor\n              locationId\n              state\n              storeName\n              storePhone\n              type\n              __typename\n            }\n            type\n            totalCharge\n            __typename\n          }\n          __typename\n        }\n        onlineStoreStatus\n        onlineStoreStatusType\n        __typename\n      }\n      installServices(storeId: $storeId, zipCode: $zipCode) @skip(if: $skipInstallServices) {\n        scheduleAMeasure\n        gccCarpetDesignAndOrderEligible\n        __typename\n      }\n      taxonomy {\n        breadCrumbs {\n          label\n          __typename\n        }\n        __typename\n      }\n      pricing(\n        storeId: $storeId\n        isBrandPricingPolicyCompliant: $isBrandPricingPolicyCompliant\n      ) {\n        alternatePriceDisplay\n        alternate {\n          bulk {\n            pricePerUnit\n            thresholdQuantity\n            value\n            __typename\n          }\n          unit {\n            caseUnitOfMeasure\n            unitsOriginalPrice\n            unitsPerCase\n            value\n            __typename\n          }\n          __typename\n        }\n        original\n        mapAboveOriginalPrice\n        mapDetail {\n          percentageOff\n          dollarOff\n          mapPolicy\n          mapOriginalPriceViolation\n          mapSpecialPriceViolation\n          __typename\n        }\n        message\n        preferredPriceFlag\n        promotion {\n          type\n          description {\n            shortDesc\n            longDesc\n            __typename\n          }\n          dollarOff\n          percentageOff\n          promotionTag\n          savingsCenter\n          savingsCenterPromos\n          specialBuySavings\n          specialBuyDollarOff\n          specialBuyPercentageOff\n          __typename\n        }\n        specialBuy\n        unitOfMeasure\n        value\n        __typename\n      }\n      dataSource\n      __typename\n    }\n    __typename\n  }\n}',
                    }
                    response = requests.post('https://apionline.homedepot.com/federation-gateway/graphql',params=params, cookies=self.cookies, headers=self.headers, json=json_data)
                    if response.status_code == 200:
                        return response.json()

                    else:
                        print(f"{keyword} post_req_48 Status code {response.status_code}")
                        return None
                else:
                    print(f'{keyword} 获取navparam失败，{res.status_code}')
                    return None

            except Exception as e:
                if "Connection aborted." in str(e):
                    retries += 1
                    print(f'{keyword}请求连接错误：{e},重试次数: {retries}')
                else:
                    print(f'{keyword} post2_req_48 error:{e}')
                    return None

        print(f'{keyword}超过重试次数')
        try:
            self.failed_queue.put([search_id])
            print(f'将{keyword}失败id:{search_id} 放入失败队列')
        except Exception as e:
            print('failed_queue队列已满，写入失败')
        return None

    def get_req(self, keyword, page,search_id):
        retries = 0
        max_retries = 3
        encoded_keyword = quote(keyword)
        url = f'https://www.homedepot.com/s/{encoded_keyword}?NCNI-5&sortorder={self.order}&sortby={self.get_sort_by}&Nao={page}'
        while retries < max_retries:
            try:
                response = requests.get(url, headers=self.headers,cookies= self.cookies)
                if response.status_code == 200:
                    return response.text
                else:
                    print(f"{keyword} fetch_page Status code {response.status_code}")
                    return None
            except Exception as e:
                if "Connection aborted." in str(e):
                    retries += 1
                    print(f'{keyword} 请求连接错误：{e},重试次数: {retries}')

                else:
                    print(f'{keyword} fetch_page error:{e}')
                    return None
        print(f'{keyword} 超过重试次数')
        return None

    def parse_json(self, html_content, search_id, keyword, real_page):
        try:
            item_list = []
            project_data = html_content['data']['searchModel']['products']
            if project_data == []:
                return []
            else:
                for project in project_data:
                    item = {}
                    item['title'] = project.get('identifiers', {}).get('productLabel', None)
                    pricing = project.get('pricing')
                    item['price'] = pricing.get('value') if pricing else None
                    identifiers = project.get('identifiers', {})
                    item['url'] = 'https://www.homedepot.com' + identifiers.get('canonicalUrl', '') if identifiers.get(
                        'canonicalUrl') else None
                    media = project.get('media', {})
                    images = media.get('images', [])
                    item['image'] = images[0].get('url', '').replace('<SIZE>', '600') if images else None
                    item['sku'] = identifiers.get('itemId', None)
                    item['search_term'] = keyword
                    item['page'] = real_page
                    item['state'] = 1
                    item['project_data'] = ''
                    item['created_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                    item_list.append(item)
                    # data = (item, search_id)
                    # try:
                    #     self.item_queue.put(data)
                    # except Exception as exc:
                    #     print('item_queue队列已满，写入失败')
                return item_list
        except Exception as e:
            print(f"{keyword} parse_json error: {e}")
            return None

    def parse_html(self, html_content,search_id, keyword):
        # with open('test.html', 'w', encoding='utf-8') as f:
        #     f.write(html_content)
        Html = etree.HTML(html_content)
        try:
            data_list = json.loads(
                Html.xpath('//script[@id="thd-helmet__script--browseSearchStructuredData"]/text()')[0])
            return data_list[0]['mainEntity']['offers']['itemOffered']
        except Exception as e:
            print(f"{keyword} Parsing error: {e}")
            if 'list index out of range' in str(e):
                return []
            else:
                return None

    def get_item(self, project_list, keyword, search_id, real_page):
        try:
            item_list = []
            for project in project_list:
                item = {}
                # 使用 .get() 方法并提供默认值以避免 KeyError
                item['title'] = project.get('name', '')
                offers = project.get('offers', {})
                price = offers.get('price') if offers else None

                if isinstance(price, str):
                    # 如果价格是字符串，则尝试将其转换为浮点数
                    try:
                        # 尝试将价格转换为浮点数
                        price = float(price) if price.replace('.', '', 1).isdigit() or (
                                price.count('.') == 1 and price.replace('.', '', 1).isdigit()) else None
                    except ValueError:
                        # 如果转换失败，设置为 None
                        price = None
                elif not isinstance(price, (int, float)):
                    # 如果既不是字符串也不是数值类型，则设置为 None
                    price = None

                item['price'] = price
                item['url'] = offers.get('url', '') if offers else None
                item['image'] = project.get('image', None)
                item['sku'] = project.get('sku', None)
                item['search_term'] = keyword
                item['page'] = real_page
                item['state'] = 1
                item['project_data'] = ''
                item['created_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                item_list.append(item)

            return item_list
        except Exception as e:
            print(f"{keyword} get_item error: {e}")
            return None


    def update(self, success_id, failed_id,invalid_id):
        try:
            if failed_id:
                print(f'失败的id:{failed_id}')
                Con.hd_test_to_4(failed_id)
                print("所有数据处理完成，状态已更新为4。")

            if invalid_id:
                invalid_id = [item for sublist in invalid_id for item in sublist]
                print(f'无效的id:{invalid_id}')
                Con.hd_test_to_5(invalid_id)
                print("所有数据处理完成，状态已更新为5。")

            if success_id[0]:
                print(f'成功的id:{success_id}')
                Con.hd_test_to_3(success_id)
                print("所有数据处理完成，状态已更新为3。")

        except Exception as e:
            print(f"更新状态时发生错误: {e}")

    def process_keyword(self, keyword, search_id):
        max_pages = 3  # 只爬取前三页
        for page in range(0, max_pages * 48, 48):  # 每次增加48
            real_page = page // 48 + 1
            print(f"爬取{keyword}第{real_page}页get...")
            if real_page == 1:
                is_valid_search = False
            else:
                is_valid_search = True

            html_content = self.get_req(keyword, page, search_id)
            if not is_valid_search:
                # 验证第一页是否有'Results'
                Html = etree.HTML(html_content)
                result = Html.xpath(
                    '//span[@class="sui-self-end results-applied__primary-filter-label results-applied__primary-filter-label-GIF-plp"]/text()')
                if not result or 'Results' not in str(result[0]):
                    print(f"{keyword} 没有 'Results'，不再尝试解析")
                    with self.lock:  # 加锁
                        try:
                            self.invalid_queue.put([search_id])
                            print(f'将{keyword}无效id:{search_id} 放入失效队列')
                        except Exception as e:
                            print('invalid_queue队列已满，写入失败')
                    break
                else:
                    is_valid_search = True  # 设置为有效搜索

            if is_valid_search:  # 有效搜索词
                print(f"爬取{keyword}第{real_page}页post1...")
                html_content2 = self.post_req_24(keyword, page, search_id)
                item_list2 = self.parse_json(html_content2, search_id, keyword, real_page)

                print(f"爬取{keyword}第{real_page}页post2...")
                html_content3 = self.post2_req_48(keyword, page, search_id)
                if html_content3:
                    item_list3 = self.parse_json(html_content3, search_id, keyword, real_page)
                else:
                    item_list3 = None

                # 合并解析结果
                combined_items = []
                if item_list2 is not None:
                    combined_items.extend(item_list2)
                if item_list3 is not None:
                    combined_items.extend(item_list3)

                if combined_items == []:
                    if real_page == 1:
                        with self.lock:  # 加锁
                            try:
                                self.failed_queue.put([search_id])
                                print(f'将{keyword}失败id:{search_id} 放入失败队列')
                            except Exception as e:
                                print('failed_queue队列已满，写入失败')
                        break
                    else:
                        print(f'{keyword}最后一页')
                        break

                # 去重
                seen_skus = set()
                unique_projects = []
                print(f'{keyword} combined_items 第{real_page}页:{len(combined_items)}')
                for project in combined_items:
                    sku = project.get('sku')
                    if sku not in seen_skus:
                        seen_skus.add(sku)
                        unique_projects.append(project)

                print(f'{keyword} seen_skus 第{real_page}页:{len(seen_skus)}')

                # 处理去重后的项目
                for item in unique_projects:
                    data = (item, search_id)
                    with self.lock:  # 加锁
                        try:
                            self.item_queue.put(data)
                        except Exception as exc:
                            print('item_queue队列已满，写入失败')

    def workers(self, task_queue):
        while not task_queue.empty():
            try:
                search_term, search_id = task_queue.get(timeout=10)
            except Exception as e:
                print('task_queue队列为空，get失败')
                continue

            with self.lock:  # 加锁
                self.process_keyword(search_term, search_id)
            task_queue.task_done()

    def run(self, search_term_id_pairs):
        task_queue = queue.Queue()
        for search_term_id in search_term_id_pairs:
            search_term = search_term_id.split('|-|-|-|-|-|')[0]
            search_id = search_term_id.split('|-|-|-|-|-|')[1]
            print(f'搜索词:{search_term},id:{search_id}')
            with self.lock:  # 加锁
                try:
                    task_queue.put((search_term, search_id))
                except Exception as exc:
                    print('task_queue队列已满，写入失败')

        threads = []
        for th in range(10):
            t = threading.Thread(target=self.workers, args=(task_queue,))
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        all_items = []
        all_idds = []
        while not self.item_queue.empty():
            with self.lock:  # 加锁
                try:
                    data = self.item_queue.get_nowait()  # 使用get_nowait避免阻塞
                    item, idd = data
                    all_items.append(item)
                    all_idds.append(idd)
                except queue.Empty:
                    pass  # 队列为空时正常结束循环

        try:
            Con.save_hd_hd_test(all_items)
            with self.lock:  # 加锁
                self.success_queue.put(list(set(all_idds)))
        except Exception as e:
            print(f"保存时发生错误: {e}")
            with self.lock:  # 加锁
                self.failed_queue.put(list(set(all_idds)))

        all_success_ids = []
        all_failed_ids = []
        all_invalid_ids = []
        while not self.success_queue.empty():
            with self.lock:  # 加锁
                all_success_ids.append(self.success_queue.get())
        while not self.failed_queue.empty():
            with self.lock:  # 加锁
                all_failed_ids.append(self.failed_queue.get())
        while not self.invalid_queue.empty():
            with self.lock:  # 加锁
                all_invalid_ids.append(self.invalid_queue.get())

        self.update(all_success_ids, all_failed_ids, all_invalid_ids)

def worker(start_id, limit):
    search_term_id_pairs = Con.get_hd_search_test(start_id, limit)
    print(f'{start_id}"-[][][]-"{start_id + limit},{search_term_id_pairs}')
    if search_term_id_pairs:
        HomeDepot().run(search_term_id_pairs)

def main():
    start_time = datetime.datetime.now()
    print(f'开始爬取---{start_time}')
    minid = 1000
    maxid = 2001

    num_processes = 5
    batch_size = 100
    p = Pool(num_processes)
    data_range = []
    start_id = int(minid)

    # 创建数据范围
    while start_id <= int(maxid):
        end_id = min(start_id + batch_size - 1, int(maxid))  # 防止超出最大ID
        data_range.append((start_id, end_id - start_id + 1))  # 计算每个范围的行数
        start_id = end_id + 1  # 更新起始ID为下一个范围的起始ID
    print(data_range)

    for start_id, limit in data_range:
        # 调用 worker 处理任务
        p.apply_async(worker, args=(start_id, limit))

    p.close()
    p.join()

    current_time = datetime.datetime.now()
    elapsed_time = current_time - start_time
    print(f'全部爬取完毕，耗时：{elapsed_time}')

if __name__ == "__main__":
    main()







