from all_connect import ConnectSpider
import ast
import gzip
import requests
from lxml import etree
from  amazon_params import py_ja3
from datetime import datetime
import re
from datetime import datetime
from collections import defaultdict
import json
Con = ConnectSpider()



class GetHtmlDetails(object):
    def __init__(self):
        self.cookies = {
            'session-id': '138-7972032-1402868',
            'i18n-prefs': 'USD',
            'ubid-main': '134-9153986-6135400',
            'lc-main': 'en_US',
            'session-id-time': '2082787201l',
            'x-amz-captcha-1': '1720064461331751',
            'x-amz-captcha-2': '9FJ9aWmeMgYXgPtd1k0BZA==',
            'session-token': 'f64VywAY2c6ZHVCOG21LVT/c3KP1qlgpC2AxNQlDmuh/CFYUBWjL+g1yk9Pp8o9QFJTzsGoK8rxsN+Bh8QhPZ8/YWziaAb7UMDmgohc08qZOINXortgh++N0i9v9y3Uswrh0+TRgbMvGvxswQB+x1bVR2zDVyxFK3jWGtZzmNOuHYIMJks9koNEe7KFmFAsIg3tyo49hpu/ayzx/zPDRHy4sbortQxTeAgUZMusWI6Cuh817sVzra97/rk4zpuqNz9IS5EetHRxSnr6yIpJe/NQlPMU9htTun+NsnI8ZovDVWRDLHpZ7NGAyfrEqQTM7yt99hIklwbCXUDaA9kc/hkGw4XcMPGsO',
            'csm-hit': 'tb:6G2FNM91K9DN8GNR7WJ6+s-GMD3X5FQRFPQF1K92NT9|1720057322140&t:1720057322140&adb:adblk_no',
        }
        self.headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'max-age=0',
            # 'cookie': 'session-id=138-7972032-1402868; i18n-prefs=USD; ubid-main=134-9153986-6135400; lc-main=en_US; session-id-time=2082787201l; x-amz-captcha-1=1720064461331751; x-amz-captcha-2=9FJ9aWmeMgYXgPtd1k0BZA==; session-token=f64VywAY2c6ZHVCOG21LVT/c3KP1qlgpC2AxNQlDmuh/CFYUBWjL+g1yk9Pp8o9QFJTzsGoK8rxsN+Bh8QhPZ8/YWziaAb7UMDmgohc08qZOINXortgh++N0i9v9y3Uswrh0+TRgbMvGvxswQB+x1bVR2zDVyxFK3jWGtZzmNOuHYIMJks9koNEe7KFmFAsIg3tyo49hpu/ayzx/zPDRHy4sbortQxTeAgUZMusWI6Cuh817sVzra97/rk4zpuqNz9IS5EetHRxSnr6yIpJe/NQlPMU9htTun+NsnI8ZovDVWRDLHpZ7NGAyfrEqQTM7yt99hIklwbCXUDaA9kc/hkGw4XcMPGsO; csm-hit=tb:6G2FNM91K9DN8GNR7WJ6+s-GMD3X5FQRFPQF1K92NT9|1720057322140&t:1720057322140&adb:adblk_no',
            'device-memory': '8',
            'downlink': '10',
            'dpr': '1',
            'ect': '4g',
            'rtt': '50',
            'sec-ch-device-memory': '8',
            'sec-ch-dpr': '1',
            'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-ch-ua-platform-version': '"10.0.0"',
            'sec-ch-viewport-width': '1919',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'none',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
            'viewport-width': '1919',
        }

    def parse_html(self, html_string):
        try:
            # 如果 html_string 是字符串类型，则尝试将其转换为字节流
            if isinstance(html_string, str):
                html_string = ast.literal_eval(html_string)
                # 解压缩字节流并解码为 UTF-8 格式的字符串
                html_string = gzip.decompress(html_string).decode('utf-8')
                return html_string

        except Exception as e:
            print("查询失败，错误信息：", e)

    def request_data(self,asin):
        sess = requests.Session()
        sess.mount('https://www.amazon.com/', py_ja3.DESAdapter())
        # 'B00F7VP5NY'
        response = sess.get(f'https://www.amazon.com/dp/{asin}', cookies=self.cookies, headers=self.headers)
        self.parse_data(response,asin)

    def parse_data(self,response,asin):
        # with open('get_pic.py', 'w',encoding='utf-8') as f:
        #     f.write(response.text)
        print(response)
        html = etree.HTML(response.text)
        # with open('get_pic.py', 'r',encoding='utf-8') as f:
        #     response_text = f.read()
        # print(response)
        # html = etree.HTML(response_text)
        img_url = html.xpath('//div[@id="imgTagWrapperId"]/img/@src')[0] if html.xpath('//div[@id="imgTagWrapperId"]/img/@src') else None
        title = html.xpath('//span[@id="productTitle"]/text()')[0].replace('  ','')
        title_len = len(title)
        price = html.xpath('//div[@class="a-section a-spacing-none aok-align-center"]/span/span[@class="a-offscreen"]/text() | //input[@id="twister-plus-price-data-price"]/@value | //span[@class="a-price a-text-price a-size-medium apexPriceToPay"]/span[@class="a-offscreen"]/text()')[0] if html.xpath('//div[@class="a-section a-spacing-none aok-align-center"]/span/span[@class="a-offscreen"]/text() | //input[@id="twister-plus-price-data-price"]/@value | //span[@class="a-price a-text-price a-size-medium apexPriceToPay"]/span[@class="a-offscreen"]/text()') else None
        if price and '$' in price:
            price = price.split('$')[1]

        rat_title = html.xpath('//span[@id="acrPopover" ]/@title')[0] if html.xpath('//span[@id="acrPopover" ]/@title') else None
        if rat_title:
            rating = rat_title.split(' ')[0]
        else:
            rating = None
        comment = html.xpath('//span[@id="acrCustomerReviewText" ]/text()')[0] if html.xpath('//span[@id="acrCustomerReviewText" ]/text()') else None
        if comment:
            total_comments = comment.split(' ')[0]
        else:
            total_comments = None



        ships = [aa.strip() for aa in html.xpath('//div[@id="fulfillerInfoFeature_feature_div"]//text()') if aa.strip()][1] if html.xpath('//div[@id="fulfillerInfoFeature_feature_div"]//text()') else None
        if not ships:
            ships = html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()')[0].split('Fulfilled by')[-1] if html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()') else None
        sold = html.xpath('//div[@id="merchantInfoFeature_feature_div"]//a/text()')[0] if html.xpath('//div[@id="merchantInfoFeature_feature_div"]//a/text()') else None
        if not sold:
            sold = html.xpath('//div[@id="merchantInfoFeature_feature_div"]//span/text()')[1] if html.xpath('//div[@id="merchantInfoFeature_feature_div"]//span/text()') else None
            if not sold:
                sold = html.xpath('//a[@id="sellerProfileTriggerId"]/text()')[0] if html.xpath('//a[@id="sellerProfileTriggerId"]/text()') else None
        if ships and sold:
            if 'amazon' in ships.lower() and 'amazon' in sold.lower() :
                buy_box_seller_type = 1
            elif 'amazon' in ships.lower() and 'amazon' not in sold.lower() :
                buy_box_seller_type = 2
            elif 'amazon' not in ships.lower()  and 'amazon' not in sold.lower() :
                buy_box_seller_type = 3

        elif sold and not ships:
            buy_box_seller_type = 3
        else:
            buy_box_seller_type = 4


        in_stock = len(html.xpath('//div[@id="quantityRelocate_feature_div"]//select[@id="quantity" ]/option/text()')) if html.xpath('//div[@id="quantityRelocate_feature_div"]//select[@id="quantity" ]/option/text()') else None
        if not in_stock:
            in_stock = int(html.xpath('//span[@class="a-size-base a-color-price a-text-bold"]/text()')[0].split('Only')[1].split('left')[0]) if html.xpath('//span[@class="a-size-base a-color-price a-text-bold"]/text()') else None
        if in_stock and in_stock >20 :
            page_inventory = 1
        elif (in_stock and 1 <= in_stock <= 20) or html.xpath('//span[@class="a-size-medium a-color-success" and contains(text(),"In stock")]/text()'):
            page_inventory = 2
        else:
            page_inventory = 3

        category = ''.join([item.strip() for item in html.xpath('//ul[@class="a-unordered-list a-horizontal a-size-small"]//text()') if item.strip()])


        weight_strs = [item.strip() for item in html.xpath('//span[contains(text(),"Item Weight")]/../span[2]/text() | //th[contains(text(),"Item Weight")]/../td//text()') if item.strip()][0] if html.xpath('//span[contains(text(),"Item Weight")]/../span[2]/text() | //th[contains(text(),"Item Weight")]/../td//text()') else None
        if not weight_strs :
            weight_strs = [item.strip() for item in html.xpath(' //span[contains(text(),"Item Weight")]/../../td[2]//text() ') if item.strip()][0] if html.xpath(' //span[contains(text(),"Item Weight")]/../../td[2]//text() ') else None

        volume_str = [item.strip().replace('\u200e','') for item in html.xpath('//span[contains(text(),"Dimensions")]/../../div[3]/span/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text() | //span[contains(text(),"Package Dimensions")]/../span[2]/text() | //th[contains(text(),"Product Dimensions")]/../td//text()')][0] if html.xpath('//span[contains(text(),"Dimensions")]/../../div[3]/span/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text() | //span[contains(text(),"Package Dimensions")]/../span[2]/text() | //th[contains(text(),"Product Dimensions")]/../td//text()') else None
        if volume_str and ';' in volume_str:
            volume = volume_str.split(';')[0]
            weight_strs = volume_str.split(';')[1]
        else:
            volume = volume_str

        if weight_strs :
            weight_strs = weight_strs.lower()
            if weight_strs and 'pounds' in weight_strs:
                weight = weight_strs.split(' pounds')[0]
            elif weight_strs and 'ounces' in weight_strs:
                weight =float(weight_strs.split(' ounces')[0].replace('\u200e', '')) / 16
            elif weight_strs and 'kilograms' in weight_strs:
                weight = round(float(weight_strs.split(' kilograms')[0]) * 2.20462,3)
            elif weight_strs and 'grams' in weight_strs:
                weight = round(float(weight_strs.split(' grams')[0]) / 453.592,3)
        else:
            weight = None

        rank = [item.strip().replace(',','') for item in html.xpath('//span[contains(text(),"Best Sellers Rank")]/../text() | //th[contains(text(),"Best Sellers Rank")]/../td//text()') if item.strip()][0].split('#')[1].split('in')[0] if html.xpath('//span[contains(text(),"Best Sellers Rank")]/../text() | //th[contains(text(),"Best Sellers Rank")]/../td//text()') else None



        launch_time = html.xpath('//span[contains(text(),"Date First Available")]/../span[2]/text() | //th[contains(text(),"Date First Available")]/../td/text()')[0].strip() if html.xpath('//span[contains(text(),"Date First Available")]/../span[2]/text() | //th[contains(text(),"Date First Available")]/../td/text()') else None
        if launch_time:
            date_obj = datetime.strptime(launch_time, "%B %d, %Y")
            launch_time = date_obj.strftime("%Y-%m-%d")

        img_num = len(html.xpath('//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"] | //ul[@class="a-unordered-list a-nostyle a-button-list a-vertical a-spacing-top-micro gridAltImageViewLayoutIn1x7"]//li[@class="a-spacing-small item"] | //ul//li[@class="a-spacing-small item"]')) if html.xpath('//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"] | //ul[@class="a-unordered-list a-nostyle a-button-list a-vertical a-spacing-top-micro gridAltImageViewLayoutIn1x7"]//li[@class="a-spacing-small item"] | //ul//li[@class="a-spacing-small item"]') else None

        if img_num:
            img_num=img_num
        elif not img_num and img_url:
            img_num = 1
        else:
            img_num = 0

        image_type = []
        if img_url:
            image_type.append(1)
        video = [item.strip() for item in html.xpath('//ul//li[@class="a-spacing-small item videoThumbnail videoBlockIngress videoBlockDarkIngress a-declarative multiple-videos"] | //ul//li[@class="a-spacing-small videoCountTemplate aok-hidden"]/span/text() | //ul//li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') if item.strip()] if html.xpath('//ul//li[@class="a-spacing-small item videoThumbnail videoBlockIngress videoBlockDarkIngress a-declarative multiple-videos"] | //ul//li[@class="a-spacing-small videoCountTemplate aok-hidden"]/span/text() | //ul//li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') else None
        if video:
            image_type.append(2)
        pics = html.xpath('//div[@class="celwidget aplus-module 3p-module-b aplus-standard"]//img/@src') if html.xpath('//div[@class="celwidget aplus-module 3p-module-b aplus-standard"]//img/@src') else None
        if pics:
            image_type.append(3)
        image_type = ','.join(map(str, image_type))


        # activity_type
        coupon_type = html.xpath('//i[contains(text(),"Coupon:")]/..//span[@class="a-color-success"]/label/text() | //span[@data-csa-c-type="item"]/label/text()')[0] if html.xpath('//i[contains(text(),"Coupon:")]/..//span[@class="a-color-success"]/label/text() | //span[@data-csa-c-type="item"]/label/text()') else None
        if coupon_type and '%' in coupon_type:
            activity_type = 1
        elif coupon_type and '$' in coupon_type:
            activity_type = 2
        else:
            activity_type = None
        if activity_type == 1 and 'Apply' in coupon_type:
            one_two_val = coupon_type.split('Apply ')[1].split('% ')[0]
        elif activity_type == 1 and 'Save' in coupon_type:
            one_two_val = coupon_type.split('Save ')[1].split('%')[0]
        else:
            one_two_val = None

        join_prime = html.xpath('//span[contains(text(),"Join Prime")]/../span[2]/text()')[0] if html.xpath('//span[contains(text(),"Join Prime")]/../span[2]/text()') else None
        if join_prime and '%' in join_prime:
            activity_type = 3
            three_four_val = join_prime.split('%')[1]
        elif join_prime and '$' in join_prime:
            activity_type = 4
            three_four_val = join_prime.split('$')[1]
        else:
            activity_type = None
            three_four_val = None

        eight_save = [item.strip() for item in html.xpath('//td[contains(text(),"You Save")]/../td[2]/span/text()') if item.strip()][0] if html.xpath('//td[contains(text(),"You Save")]/../td[2]/span/text()') else None
        if eight_save:
            activity_type = 8

        eight_val = None
        five_six_val = None

        one_star = html.xpath('//span[contains(text(),"1 star")]/../../td[3]//text() | //a[contains(text(),"1 star")]/../../td[3]//text()')[0].replace('%','') if html.xpath('//span[contains(text(),"1 star")]/../../td[3]//text() | //a[contains(text(),"1 star")]/../../td[3]//text()') else None
        two_star = html.xpath('//a[contains(text(),"2 star")]/../../td[3]//text() | //span[contains(text(),"2 star")]/../../td[3]//text()')[0].replace('%','') if html.xpath('//a[contains(text(),"2 star")]/../../td[3]//text() | //span[contains(text(),"2 star")]/../../td[3]//text()') else None
        three_star = html.xpath('//span[contains(text(),"3 star")]/../../td[3]//text() | //a[contains(text(),"3 star")]/../../td[3]//text()')[0].replace('%','') if html.xpath('//span[contains(text(),"3 star")]/../../td[3]//text() | //a[contains(text(),"3 star")]/../../td[3]//text()') else None
        four_star = html.xpath('//a[contains(text(),"4 star")]/../../td[3]//text()')[0].replace('%','') if html.xpath('//a[contains(text(),"4 star")]/../../td[3]//text()') else None
        five_star = html.xpath('//a[contains(text(),"5 star")]/../../td[3]//text()')[0].replace('%','') if html.xpath('//a[contains(text(),"5 star")]/../../td[3]//text()') else None
        low_star = int(one_star) + int(two_star) + int(three_star)

        together_asin_list = [re.findall('pd_rd_i=(.*?)&psc',item) for item in html.xpath('//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-thumbnail-box__36bD3"]//div//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href')] if html.xpath('//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-thumbnail-box__36bD3"]//div//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href') else None
        if together_asin_list:
            flat_list = [item for sublist in together_asin_list for item in sublist]
            together_asin = ','.join(flat_list)
        else:
            together_asin = None

        if not together_asin_list:
            together_asin_list = [re.findall('dp/(.*?)/ref',item) for item in html.xpath('//div[@class="a-section a-spacing-none _p13n-desktop-sims-fbt_fbt-desktop_link-area__1VLAZ"]/a/@href')] if html.xpath('//div[@class="a-section a-spacing-none _p13n-desktop-sims-fbt_fbt-desktop_link-area__1VLAZ"]/a/@href') else None
            if together_asin_list:
                flat_list = [item for sublist in together_asin_list for item in sublist]
                together_asin = ','.join(flat_list)
            else:
                together_asin = None
        else:
            together_asin = None


        brand_str = html.xpath('//a[@id="bylineInfo"]/text()')[0] if html.xpath('//a[@id="bylineInfo"]/text()') else None
        if brand_str and 'Brand: ' in brand_str:
            brand = brand_str.split('Brand: ')[1]
        elif brand_str and 'Visit the ' in brand_str and ' Store' in brand_str:
            brand = brand_str.split('Visit the ')[1].split(' Store')[0]
        else:
            brand = None

        ac_name = [item.strip() for item in html.xpath('//span[@class="ac-for-text"]//text()') if item.strip()][0].split('in ')[1] if html.xpath('//span[@class="ac-for-text"]//text()') else None
        material = html.xpath('//table[@class="a-normal a-spacing-micro"]//span[text() = "Material"]/../../td[2]/span/text()')[0] if html.xpath('//table[@class="a-normal a-spacing-micro"]//span[text() = "Material"]/../../td[2]/span/text()') else None




        node_id_str = html.xpath('//ul[@class="a-unordered-list a-horizontal a-size-small"]//li[last()]//a/@href')[0] if html.xpath('//ul[@class="a-unordered-list a-horizontal a-size-small"]//li[last()]//a/@href') else None
        if node_id_str and 'node=' in node_id_str:
            node_id = node_id_str.split('node=')[1]
        elif node_id_str and 'fashion' in node_id_str and 'ref' in node_id_str:
            node_id = node_id_str.split('fashion')[1].split('ref')[0]
        else:
            node_id = node_id_str

        describe = '|-|'.join(html.xpath('//ul[@class="a-unordered-list a-vertical a-spacing-mini"]/li//text() | //ul[@class="a-unordered-list a-vertical a-spacing-small"]//li//text()')) if html.xpath('//ul[@class="a-unordered-list a-vertical a-spacing-mini"]/li//text() | //ul[@class="a-unordered-list a-vertical a-spacing-small"]//li//text()') else None
        now = datetime.now()
        # created_time = now.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
        # updated_time = now.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
        date_info = now.strftime('%Y-%m')
        weight_str = [item.strip().replace('\u200e','') for item in html.xpath('//th[contains(text(),"Item Weight")]/../td//text()') if item.strip()][0] if html.xpath('//th[contains(text(),"Item Weight")]/../td//text()') else None


        pattern_name = html.xpath('//div[@id="customer_review-RYQ2ES848T5PV"]//span[@data-hook="format-strip-linkless"]/text()')[0].split('Pattern Name: ')[1] if html.xpath('//div[@id="customer_review-RYQ2ES848T5PV"]//span[@data-hook="format-strip-linkless"]/text()') else None
        follow_sellers = html.xpath('//span[@class="a-declarative"]/span[@class="a-color-base"]/text()')[0].split('(')[1].split(')')[0] if html.xpath('//span[@class="a-declarative"]/span[@class="a-color-base"]/text()') else None
        sp_related_to_term = html.xpath('//div[@id="sp_detail2"]/@data-a-carousel-options')[0] if html.xpath('//div[@id="sp_detail2"]/@data-a-carousel-options') else None
        if sp_related_to_term:
            sp_1 = re.findall('"set_size":(.*?),"filteredItems"',sp_related_to_term)[0]
        else:
            sp_1 = 0
        sp_4_stars_and_above = html.xpath('//h2[contains(text(),"4 stars and above")]/../../../@data-a-carousel-options')[0] if html.xpath('//h2[contains(text(),"4 stars and above")]/../../../@data-a-carousel-options') else None
        if sp_4_stars_and_above:
            sp_2 = re.findall('"set_size":(.*?),"filteredItems"',sp_4_stars_and_above)[0]
        else:
            sp_2 = 0
        free_delivery = html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../@data-a-carousel-options')[0] if html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../@data-a-carousel-options') else None
        if free_delivery:
            sp_3 = re.findall('"set_size":(.*?),"filteredItems"',free_delivery)[0]
        else:
            sp_3 = 0
        sp_num = str(sp_1) + ',' + str(sp_2) + ',' + str(sp_3)

        product_description = '|-|'.join([item.strip() for item in html.xpath('//table[@class="apm-fixed-width apm-eventhirdcol-table"]//td//text()  | //div[@id="productDescription"]//span/text()') if item.strip()]) if html.xpath('//table[@class="apm-fixed-width apm-eventhirdcol-table"]//td//text()  | //div[@id="productDescription"]//span/text()') else None
        buy_sales = html.xpath('//span[@id="social-proofing-faceout-title-tk_bought"]/span/text()')[0] if html.xpath('//span[@id="social-proofing-faceout-title-tk_bought"]/span/text()') else None
        package_quantity = html.xpath('//label[contains(text(),"Item Package Quantity:")]/../span/text()')[0].strip() if html.xpath('//label[contains(text(),"Item Package Quantity:")]/../span/text()') else None




        product_info_dict = html.xpath('//table[@class="a-normal a-spacing-micro"]//span/text()') if html.xpath('//table[@class="a-normal a-spacing-micro"]//span/text()') else None
        if product_info_dict:
            product_json = json.dumps({product_info_dict[i]: product_info_dict[i + 1] for i in range(0, len(product_info_dict), 2)})
        else:
            product_json = product_info_dict

        detail_json = [item.strip().replace('\u200e','') for item in html.xpath('//h1[contains(text(),"Technical Details")]/../../..//tr//text() | //h1[contains(text(),"Additional Information")]/..//td[@class="a-size-base prodDetAttrValue"]/..//text()') if item.strip()] if html.xpath('//h1[contains(text(),"Technical Details")]/../../..//tr//text() | //h1[contains(text(),"Additional Information")]/..//td[@class="a-size-base prodDetAttrValue"]/..//text()') else None
        if detail_json:
            product_detail_json = json.dumps({detail_json[i]: detail_json[i + 1] for i in range(0, len(detail_json), 2)})
        else:
            product_detail_json = detail_json

        review_ai_text = html.xpath('//h3[contains(text(),"Customers say")]/../../p/span/text()')[0] if html.xpath('//h3[contains(text(),"Customers say")]/../../p/span/text()') else None

        # sp_initial_seen_asins_json
        seen_asins = [item.split('_')[-1] for item in html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
        seen_asins_title = html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
        seen_asins_src = html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div//img/@src') if html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div//img/@src') else None
        seen_asins_total_comments = [item.xpath('./div//span[@class="a-color-link"]/text()')[0] if item.xpath('./div//span[@class="a-color-link"]/text()') else None for item in html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li')]
        seen_asins_price = [item.xpath('./div//span[@class="a-offscreen"]/text()')[0] if item.xpath('./div//span[@class="a-offscreen"]/text()') else None for item in html.xpath('//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li')]
        if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
            initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments,seen_asins_price)
            sp_initial_seen_asins_json = json.dumps([{"seen_asins": seen_asins, "seen_asins_title": seen_asins_title, "seen_asins_src": seen_asins_src,"seen_asins_total_comments": seen_asins_total_comments, "seen_asins_price": seen_asins_price} for seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments, seen_asins_price in initial_combined])
        else:
            sp_initial_seen_asins_json = None


        # sp_4stars_initial_seen_asins_json
        seen_asins = [item.split('_')[-1] for item in html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
        seen_asins_title = html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
        seen_asins_src = html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div//img/@src') if html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div//img/@src') else None
        seen_asins_total_comments = [item.xpath('./div//span[@class="a-color-link"]/text()')[0] if item.xpath('./div//span[@class="a-color-link"]/text()') else None for item in html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li')]
        seen_asins_price = [item.xpath('./div//span[@class="a-offscreen"]/text()')[0] if item.xpath('./div//span[@class="a-offscreen"]/text()') else None for item in html.xpath('//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li')]

        if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
            delivery_initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src,seen_asins_total_comments,seen_asins_price)
            sp_4stars_initial_seen_asins_json = json.dumps([{"seen_asins": seen_asins, "seen_asins_title": seen_asins_title, "seen_asins_src": seen_asins_src,"seen_asins_total_comments":seen_asins_total_comments,"seen_asins_price":seen_asins_price} for seen_asins, seen_asins_title, seen_asins_src,seen_asins_total_comments,seen_asins_price in delivery_initial_combined])
        else:
            sp_4stars_initial_seen_asins_json = None


        # sp_delivery_initial_seen_asins_json
        seen_asins = [item.split('_')[-1] for item in html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
        seen_asins_title = html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
        seen_asins_src = html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/img/@src') if html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/img/@src') else None
        seen_asins_total_comments = [item.xpath('.//span[@class="a-color-link"]/text()')[0] if item.xpath('.//span[@class="a-color-link"]/text()') else None for item in html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li')]
        seen_asins_price = [item.xpath('.//span[@class="a-offscreen"]/text()')[0] if item.xpath('.//span[@class="a-offscreen"]/text()') else None for item in html.xpath('//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li')]

        if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
            delivery_initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src,seen_asins_total_comments,seen_asins_price)
            sp_delivery_initial_seen_asins_json = json.dumps([{"seen_asins": seen_asins, "seen_asins_title": seen_asins_title, "seen_asins_src": seen_asins_src,"seen_asins_total_comments":seen_asins_total_comments,"seen_asins_price":seen_asins_price} for seen_asins, seen_asins_title, seen_asins_src,seen_asins_total_comments,seen_asins_price in delivery_initial_combined])
        else:
            sp_delivery_initial_seen_asins_json = None

        # compare_similar_asin_json
        compare_asin = [item.split('-')[-1] for item in html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]/div/@id')] if html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]/div/@id') else None
        compare_asin_src = html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@src') if html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@src') else None
        compare_asin_title = html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@alt') if html.xpath('//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@alt') else None
        if compare_asin and compare_asin_src and compare_asin_title :
            compare_combined = zip(compare_asin, compare_asin_src, compare_asin_title)
            compare_similar_asin_json = json.dumps([{"compare_asin": compare_asin, "compare_asin_src": compare_asin_src, "compare_asin_title": compare_asin_title} for compare_asin, compare_asin_src, compare_asin_title in compare_combined])
        else:
            compare_similar_asin_json = None

        toge_asin = [item for sublist in [re.findall('/dp/(.*?)/ref',item) for item in html.xpath('//h2[contains(text(),"Frequently bought together")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href | //h2[contains(text(),"Buy it with")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href')] for item in sublist] if html.xpath('//h2[contains(text(),"Frequently bought together")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href | //h2[contains(text(),"Buy it with")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href') else None
        toge_title = html.xpath('//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-detail-faceout-box___WyNy"][position() > 1]//span[@class="a-size-base"]/text()')
        toge_price = html.xpath('//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-detail-faceout-box___WyNy"][position() > 1]//span[@class="a-offscreen"]/text()')
        if toge_asin and toge_title and toge_price:
            together_asin_combined = zip(toge_asin, toge_title, toge_price)
            together_asin_json = json.dumps([{"together_asin": asin, "together_asin_title": title, "together_asin_price": price} for asin, title, price in together_asin_combined])
        else:
            together_asin_json = None
        reviews_json = html.xpath('//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-size-base a-color-base"]/text() |//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-icon-alt"]/text()')
        customer_reviews_json = json.dumps([{reviews_json[i]: reviews_json[i + 1]} for i in range(0, len(reviews_json), 2)])

        # lob_asin_json
        lob_asin = html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//a/@href') if html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//a/@href') else None
        lob_asin_img = html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//img/@src') if html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//img/@src') else None
        lob_asin_title = html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//div[@class="a-section a-spacing-none"]/span/span/text()')
        lob_asin_price = [item.xpath('.//span[@class="a-price pba-lob-bundle-buy-price"]/span[@class="a-offscreen"]/text()')[0] if item.xpath('.//span[@class="a-price pba-lob-bundle-buy-price"]/span[@class="a-offscreen"]/text()') else None for item in html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li')]

        if lob_asin and lob_asin_img and lob_asin_title and lob_asin_price :
            lob_combined = zip(lob_asin, lob_asin_img, lob_asin_title,lob_asin_price)
            lob_asin_json = json.dumps([{"lob_asin": lob_asin, "lob_asin_img": lob_asin_img, "lob_asin_title": lob_asin_title,"lob_asin_price":lob_asin_price,"lob_asin_total_comment":None} for lob_asin, lob_asin_img, lob_asin_title,lob_asin_price in lob_combined])
        else:
            lob_asin_json = None

        # review_lable_json
        review_lable_title = html.xpath('//div[@class="a-section a-spacing-small a-spacing-top-small _cr-product-insights_style_aspect-symbol-list__24amT"]/a/text()') if html.xpath('//div[@class="a-section a-spacing-small a-spacing-top-small _cr-product-insights_style_aspect-symbol-list__24amT"]/a/text()') else None
        if review_lable_title:
            review_lable_json = {}
            for lable_title in review_lable_title:
                reviews = '|-|'.join([item.replace('Read more','') for item in html.xpath(f'//span[contains(text(),"{lable_title}")]/../../../..//text()')])
                review_lable_json[lable_title] = reviews
            json.dumps(review_lable_json)
        else:
            review_lable_json = None

        views = html.xpath('//div[@id="ivImagesTab"]') if html.xpath('//div[@id="ivImagesTab"]') else None
        if views:
            image_view = 1
        else:
            image_view = 0



        fulfilled_by = html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()')[0].split('Fulfilled by')[-1] if html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()') else None
        seller_id = html.xpath('//input[@id="merchantID"]/@value')[0] if html.xpath('//input[@id="merchantID"]/@value') else None
        seller_json = json.dumps([{"seller_id": seller_id, "ship_from": ships, "sold_by": sold, "fulfilled_by": fulfilled_by}])


        variat_num = len(html.xpath('//ul[@class="a-unordered-list a-nostyle a-button-list a-declarative a-button-toggle-group a-horizontal a-spacing-top-micro swatches swatchesSquare imageSwatches"]//li')) if html.xpath('//ul[@class="a-unordered-list a-nostyle a-button-list a-declarative a-button-toggle-group a-horizontal a-spacing-top-micro swatches swatchesSquare imageSwatches"]//li') else 0
        curr_asin = html.xpath('//link[@rel="canonical"]/@href')[0].split('dp/')[1]
        if curr_asin == asin:
            current_asin = None
        else:
            current_asin = curr_asin

        img_src = html.xpath('//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"]//img/@src | //ul//li[@class="a-spacing-small item"]//img/@src') if html.xpath('//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"]//img/@src | //ul//li[@class="a-spacing-small item"]//img/@src') else None
        video_src = html.xpath('//ul/li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') if html.xpath('//ul/li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') else None
        pics_src = html.xpath('//h2[contains(text(),"Product Description")]/..//div[@class="aplus-module-wrapper apm-spacing apm-floatnone apm-fixed-width"]//img/@data-src') if html.xpath('//h2[contains(text(),"Product Description")]/..//div[@class="aplus-module-wrapper apm-spacing apm-floatnone apm-fixed-width"]//img/@data-src') else None

        img_list = json.dumps([
            [asin, src, idx, type]
            for type, srcs in enumerate([img_src, video_src, pics_src], start=1)
            if srcs is not None
            for idx, src in enumerate(srcs, start=1)])
        parent_asin = re.findall('"parent_asin":"(.*?)",', response.text)[0] if re.findall('"parent_asin":"(.*?)",', response.text) else None
        min_match_asin_json = None

        asin_detail = re.findall('"dimensionValuesDisplayData" : \{(.*?)},', response.text) if re.findall('"dimensionValuesDisplayData" : \{(.*?)},', response.text) else None
        if asin_detail:
            # 使用正则表达式提取 asins 和 details
            asins = re.findall(r'"(\w+)":\[', str(asin_detail))
            details = [json.loads(item) for item in re.findall('":(\[".*?"])', str(asin_detail))]
            # 创建 variat_list，包含每个 asin 和对应的 detail
            variat_list = [[asin] + detail for asin, detail in zip(asins, details)]
            # 向每个子列表添加额外的值
            for detail in variat_list:
                detail.extend([parent_asin, 1, None, None])
            variat_list = json.dumps(variat_list)
        else:
            variat_list = None



        item = {}
        item['asin'] = asin
        item['img_url'] = img_url
        item['title'] = title
        item['title_len'] =title_len
        item['price'] = price
        item['rating'] = rating
        item['total_comments'] = total_comments
        item['buy_box_seller_type'] = buy_box_seller_type
        item['page_inventory'] = page_inventory
        item['category'] =category
        item['volume'] = volume
        item['weight'] = weight
        item['rank'] = rank
        item['launch_time'] = launch_time
        item['category_state'] = None
        item['img_num'] = img_num
        item['image_type'] = image_type
        item['activity_type'] = activity_type
        item['one_two_val'] = one_two_val
        item['three_four_val'] = three_four_val
        item['five_six_val'] = five_six_val
        item['eight_val'] = eight_val
        item['qa_num'] = None
        item['one_star'] = one_star
        item['two_star'] = two_star
        item['three_star'] =three_star
        item['four_star'] = four_star
        item['five_star'] = five_star
        item['low_star'] = low_star
        item['together_asin']= together_asin
        item['brand']=  brand
        item['ac_name']= ac_name
        item['material']=  material
        item['node_id']= node_id
        item['data_type'] = None
        item['sp_num']= sp_num
        item['describe']= describe
        item['date_info']=  date_info
        item['weight_str']=  weight_str
        item['package_quantity']=  package_quantity
        item['pattern_name']=  pattern_name
        item['spider_int'] = None
        item['follow_sellers']=  follow_sellers
        item['product_description']=  product_description
        item['buy_sales']=  buy_sales
        item['image_view']= image_view
        item['product_json']= product_json
        item['product_detail_json']= product_detail_json
        item['review_ai_text:']= review_ai_text
        item['review_lable_json'] = review_lable_json
        item['lob_asin_json:'] = lob_asin_json
        item['sp_initial_seen_asins_json'] = sp_initial_seen_asins_json
        item['sp_4stars_initial_seen_asins_json'] = sp_4stars_initial_seen_asins_json
        item['sp_delivery_initial_seen_asins_json'] = sp_delivery_initial_seen_asins_json
        item['compare_similar_asin_json']=  compare_similar_asin_json
        item['customer_reviews_json']=  customer_reviews_json
        item['together_asin_json']=  together_asin_json
        item['min_match_asin_json']= min_match_asin_json
        item['seller_json']= seller_json
        item['variat_num']= variat_num
        item['current_asin']=  current_asin
        item['img_list']= img_list
        item['variat_list']= variat_list
        item['parent_asin']= parent_asin

        print(item)



        Con.save_us_asin_detail_2024_wj(item)

    def run(self):
        # search_term_list = Con.connect()
        # if search_term_list:
        #     for search_html in product_descriptionsearch_term_list:
        #         self.analyse_html_queue.put(search_html)
        asin = 'B00DU25BBS'
        self.request_data(asin)



if __name__ == '__main__':
    GetHtmlDetails().run()




