import json
from lxml import etree
import re
from datetime import datetime
import ast


class HtmlDetailsParser():
    def __init__(self):
        pass

    def parse_head_img_url_and_img_type_and_img_num(self,asin,html):
        try:
            img_url = html.xpath('//div[@id="imgTagWrapperId"]/img/@src')[0][:400] if html.xpath(
                '//div[@id="imgTagWrapperId"]/img/@src') else None

            img_type = []
            if img_url:
                img_type.append(1)
            video = [item.strip() for item in html.xpath(
                '//ul//li[@class="a-spacing-small item videoThumbnail videoBlockIngress videoBlockDarkIngress a-declarative multiple-videos"] | //ul//li[@class="a-spacing-small videoCountTemplate aok-hidden"]/span/text() | //ul//li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src')
                     if item.strip()] if [item.strip() for item in html.xpath(
                '//ul//li[@class="a-spacing-small item videoThumbnail videoBlockIngress videoBlockDarkIngress a-declarative multiple-videos"] | //ul//li[@class="a-spacing-small videoCountTemplate aok-hidden"]/span/text() | //ul//li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src')
                                          if item.strip()] else None
            if video:
                img_type.append(2)
            pics = html.xpath(
                '//div[@class="celwidget aplus-module 3p-module-b aplus-standard"]//img/@src | //h2[contains(text(),"From the manufacturer")]/..//div[@class="apm-hero-image"]//@src | //div[@id="aplus"]//img/@src') if html.xpath(
                '//div[@class="celwidget aplus-module 3p-module-b aplus-standard"]//img/@src | //h2[contains(text(),"From the manufacturer")]/..//div[@class="apm-hero-image"]//@src | //div[@id="aplus"]//img/@src') else None
            if pics:
                img_type.append(3)
            img_type = ','.join(map(str, img_type))[:10]

            img_num = len(html.xpath(
                '//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"] | //ul[@class="a-unordered-list a-nostyle a-button-list a-vertical a-spacing-top-micro gridAltImageViewLayoutIn1x7"]//li[@class="a-spacing-small item"] | //ul//li[@class="a-spacing-small item"]')) if html.xpath(
                '//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"] | //ul[@class="a-unordered-list a-nostyle a-button-list a-vertical a-spacing-top-micro gridAltImageViewLayoutIn1x7"]//li[@class="a-spacing-small item"] | //ul//li[@class="a-spacing-small item"]') else None

            if img_num:
                img_num = img_num
                if video:
                    img_num = img_num + 1
            elif not img_num and img_url:
                img_num = 0
            else:
                img_num = 0

            return (img_url,img_type,img_num)
        except Exception as e:
            print(f'{asin}的img_url or img_type or img_num出错，{e}')
    def parse_head_title(self,asin,html):
        try:
            title = html.xpath('//span[@id="productTitle"]/text() | //h1[@id="title"]/span/text()')[0].replace('  ', '').rstrip()[:400] if html.xpath('//span[@id="productTitle"]/text() | //h1[@id="title"]/span/text()') else None
            if title:
                title_len = len(title)
            else:
                title_len = None
            return (title,title_len)
        except Exception as e:
            print(f'{asin}的title出错，{e}')

    def parse_head_price(self,asin,html):
        try:
            price = [item.strip().replace(',', '') for item in html.xpath(
                '//div[@class="a-section a-spacing-none aok-align-center"]/span/span[@class="a-offscreen"]/text() | //input[@id="twister-plus-price-data-price"]/@value ')
                     if item.strip()][
                0] if [item.strip() for item in html.xpath(
                '//div[@class="a-section a-spacing-none aok-align-center"]/span/span[@class="a-offscreen"]/text() | //input[@id="twister-plus-price-data-price"]/@value ')
                       if item.strip()] else None
            if not price:
                price = [item.strip().replace(',', '') for item in html.xpath(
                    '//span[@class="a-price a-text-price a-size-medium apexPriceToPay"]/span[@class="a-offscreen"]/text() | //span[@class="a-price aok-align-center reinventPricePriceToPayMargin priceToPay"]/span/text()')
                         if item.strip()][0] if [item.strip() for item in html.xpath(
                    '//span[@class="a-price a-text-price a-size-medium apexPriceToPay"]/span[@class="a-offscreen"]/text() | //span[@class="a-price aok-align-center reinventPricePriceToPayMargin priceToPay"]/span/text()')
                                                 if item.strip()] else None

            if price and '$' in price:
                price = price.split('$')[1]
            return price
        except Exception as e:
            print(f'{asin}的price出错，{e}')

    def parse_head_rating(self,asin,html):
        try:
            rat_title = html.xpath('//span[@id="acrPopover" ]/@title')[0] if html.xpath(
                '//span[@id="acrPopover" ]/@title') else None
            if rat_title:
                rating = rat_title.split(' ')[0]
            else:
                rating = None
            return rating
        except Exception as e:
            print(f'{asin}的rating出错，{e}')

    def parse_head_total_comments(self,asin,html):
        try:
            comment = html.xpath('//span[@id="acrCustomerReviewText" ]/text()')[0] if html.xpath(
                '//span[@id="acrCustomerReviewText" ]/text()') else None
            if comment:
                total_comments = comment.split(' ')[0].replace(',', '')
            else:
                total_comments = None
            return total_comments
        except Exception as e:
            print(f'{asin}的total_comments出错，{e}')

    def parse_head_buy_box_seller_type_and_seller_json(self,asin,html):
        try:
            ships = [aa.strip() for aa in html.xpath('//div[@id="fulfillerInfoFeature_feature_div"]//text()') if aa.strip()][
                    1] if [aa.strip() for aa in html.xpath('//div[@id="fulfillerInfoFeature_feature_div"]//text()') if aa.strip()] else None
            if not ships:
                ships = html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()')[0].split('Fulfilled by')[-1] if html.xpath(
                    '//a[@id="SSOFpopoverLink_ubb"]/text()') else None
            sold = html.xpath('//div[@id="merchantInfoFeature_feature_div"]//span/text()')[1] if html.xpath(
                '//div[@id="merchantInfoFeature_feature_div"]//span/text()') else None
            if not sold:
                sold = html.xpath('//a[@id="sellerProfileTriggerId"]/text()')[0] if html.xpath(
                    '//a[@id="sellerProfileTriggerId"]/text()') else None

            if ships and sold:
                if 'amazon' in ships.lower() and 'amazon' in sold.lower():
                    buy_box_seller_type = 1
                elif 'amazon' in ships.lower() and 'amazon' not in sold.lower():
                    buy_box_seller_type = 2
                elif 'amazon' not in ships.lower() and 'amazon' not in sold.lower():
                    buy_box_seller_type = 3

            else:
                buy_box_seller_type = 4

            # fulfilled_by
            fulfilled_by = html.xpath('//a[@id="SSOFpopoverLink_ubb"]/text()')[0].split('Fulfilled by')[
                -1] if html.xpath(
                '//a[@id="SSOFpopoverLink_ubb"]/text()') else None
            seller_id = html.xpath('//input[@id="merchantID"]/@value')[0] if html.xpath(
                '//input[@id="merchantID"]/@value') else None
            seller_json = json.dumps(
                [{"seller_id": seller_id, "ship_from": ships, "sold_by": sold, "fulfilled_by": fulfilled_by}])
            return (buy_box_seller_type,seller_json)
        except Exception as e:
            print(f'{asin}的buy_box_seller_type出错，{e}')

    def parse_head_page_inventory(self,asin,html):
        try:
            in_stock = len(html.xpath(
                '//div[@id="quantityRelocate_feature_div"]//select[@id="quantity" ]/option/text()')) if html.xpath(
                '//div[@id="quantityRelocate_feature_div"]//select[@id="quantity" ]/option/text()') else None
            if not in_stock:
                in_stock = html.xpath('//span[@class="a-size-base a-color-price a-text-bold"]/text()')[0] if html.xpath(
                    '//span[@class="a-size-base a-color-price a-text-bold"]/text()') else None
                if in_stock and 'usually ships' in in_stock.lower():
                    in_stock = None
                if in_stock and 'Only' in in_stock:
                    in_stock = int(in_stock.split('Only')[1].split('left')[0])
                else:
                    in_stock = None
            if in_stock and in_stock > 20:
                page_inventory = 1
            elif (in_stock and 1 <= in_stock <= 20) or html.xpath(
                    '//span[@class="a-size-medium a-color-success" and contains(text(),"In stock")]/text()'):
                page_inventory = 2
            else:
                page_inventory = 3
            return page_inventory
        except Exception as e:
            print(f'{asin}的page_inventory出错，{e}')

    def parse_head_category(self,asin,html):
        try:
            category = ''.join(
                [item.strip() for item in
                 html.xpath('//ul[@class="a-unordered-list a-horizontal a-size-small"]//text()') if item.strip()])[:400]
            return category
        except Exception as e:
            print(f'{asin}的category出错，{e}')

    def parse_head_volume_and_weight(self,asin,html):
        try:
            weight_strs = [item.strip() for item in html.xpath(
                '//div[@class="a-row a-expander-container a-expander-inline-container"]//span[contains(text(),"Item Weight")]/../span[2]/text() | //div[@class="a-row a-expander-container a-expander-inline-container"]//th[contains(text(),"Item Weight")]/../td//text()')
                           if item.strip()][0] if [item.strip() for item in html.xpath(
                '//div[@class="a-row a-expander-container a-expander-inline-container"]//span[contains(text(),"Item Weight")]/../span[2]/text() | //div[@class="a-row a-expander-container a-expander-inline-container"]//th[contains(text(),"Item Weight")]/../td//text()')
                                                   if item.strip()] else None
            volume_str = [item.strip().replace('\u200e', '') for item in html.xpath(
                '//span[contains(text(),"Dimensions")]/../../div[3]/span/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text() | //span[contains(text(),"Package Dimensions")]/../span[2]/text() | //th[contains(text(),"Item Package Dimensions L x W x H")]/../td//text()')][
                0] if [item.strip().replace('\u200e', '') for item in html.xpath(
                '//span[contains(text(),"Dimensions")]/../../div[3]/span/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text() | //span[contains(text(),"Package Dimensions")]/../span[2]/text() | //th[contains(text(),"Item Package Dimensions L x W x H")]/../td//text()')] else None
            if volume_str:
                if ';' in volume_str:
                    volume = volume_str.split(';')[0][:100]
                    weight_strs = volume_str.split(';')[1]
                else:
                    volume = volume_str[:100]
            else:
                volume = None

            if weight_strs:
                weight_strs = weight_strs.lower()
                if weight_strs and ' hundredths pounds' in weight_strs:
                    weight = round(float(weight_strs.split(' hundredths pounds')[0].replace('\u200e', '')) / 100, 3)
                elif weight_strs and ' pounds' in weight_strs:
                    weight = weight_strs.split(' pounds')[0].replace('\u200e', '')
                elif weight_strs and ' ounces' in weight_strs:
                    weight = round(float(weight_strs.split(' ounces')[0].replace('\u200e', '')) / 16, 3)
                elif weight_strs and ' kilograms' in weight_strs:
                    weight = round(float(weight_strs.split(' kilograms')[0].replace('\u200e', '')) * 2.20462, 3)
                elif weight_strs and ' grams' in weight_strs:
                    weight = round(float(weight_strs.split(' grams')[0].replace('\u200e', '')) / 453.592, 3)
                elif weight_strs and ' g' in weight_strs:
                    weight = round(float(weight_strs.split(' g')[0].replace('\u200e', '')) * 0.00220462, 3)
                elif weight_strs and ' milligrams' in weight_strs:
                    weight = round(float(weight_strs.split(' milligrams')[0].replace('\u200e', '')) / 453.59237, 3)

            else:
                weight = None
            return (volume,weight)
        except Exception as e:
            print(f'{asin}的 volume or weight出错，{e}')

    def parse_tail_rank(self,asin,html):
        try:
            rank = [item.strip().replace(',', '') for item in html.xpath(
                '//span[contains(text(),"Best Sellers Rank")]/../text() | //th[contains(text(),"Best Sellers Rank")]/../td//text()')
                    if item.strip()][0] if [item.strip().replace(',', '') for item in html.xpath(
                '//span[contains(text(),"Best Sellers Rank")]/../text() | //th[contains(text(),"Best Sellers Rank")]/../td//text()')
                                            if item.strip()] else None

            if rank and ' ' in rank:
                rank = ' '.join(re.findall(r'\d+', rank))
            return rank
        except Exception as e:
            print(f'{asin}的rank出错，{e}')

    def parse_tail_launch_time(self,asin,html):
        try:
            launch_time = [item.strip() for item in html.xpath(
                '//span[contains(text(),"Date First Available")]/../span[2]/text() | //th[contains(text(),"Date First Available")]/../td/text()  | //span[text()="Publication date"]/../../div[3]//text() | //span[contains(text(), "Release date:")]/following-sibling::span[1]/text()')
                           if item.strip()][0] if [item.strip() for item in html.xpath(
                '//span[contains(text(),"Date First Available")]/../span[2]/text() | //th[contains(text(),"Date First Available")]/../td/text()  | //span[text()="Publication date"]/../../div[3]//text() | //span[contains(text(), "Release date:")]/following-sibling::span[1]/text()')
                                                   if item.strip()] else None
            if launch_time:
                date_obj = datetime.strptime(launch_time.replace('\u200e', ''), "%B %d, %Y")
                launch_time = date_obj.strftime("%Y-%m-%d")
            return launch_time
        except Exception as e:
            print(f'{asin}的launch_time出错，{e}')

    def parse_head_activity_type_and_val(self, asin, html):
        try:
            # activity_type

            activity_type = []
            coupon_type = html.xpath(
                '//i[contains(text(),"Coupon:")]/..//span[@class="a-color-success"]/label/text()')[
                0] if html.xpath(
                '//i[contains(text(),"Coupon:")]/..//span[@class="a-color-success"]/label/text()') else None
            if coupon_type and '%' in coupon_type:
                activity_type.append('1')
            elif coupon_type and '$' in coupon_type:
                activity_type.append('2')

            if '1' in activity_type and 'Apply' in coupon_type:
                one_two_val = coupon_type.split('Apply ')[1].split('% ')[0].replace(',','')
            elif '1' in activity_type and 'Save' in coupon_type:
                one_two_val = coupon_type.split('Save ')[1].split('%')[0].replace(',','')
            else:
                one_two_val = None
            if one_two_val and ' ' in one_two_val:
                one_two_val = ' '.join(re.findall(r'\d+', one_two_val)).replace(',','')

            join_prime = html.xpath('//span[contains(text(),"Join Prime")]/../span[2]/text()')[0] if html.xpath(
                '//span[contains(text(),"Join Prime")]/../span[2]/text()') else None
            if join_prime and '%' in join_prime:
                activity_type.append('3')
                three_four_val = join_prime.split('%')[1].replace(',','')
            elif join_prime and '$' in join_prime:
                activity_type.append('4')
                three_four_val = join_prime.split('$')[1].replace(',','')
            else:
                three_four_val = None

            eight_val = \
                [item.strip().replace(',','') for item in html.xpath('//td[contains(text(),"You Save")]/../td[2]/span/text()') if
                 item.strip()][0] if [item.strip() for item in
                                      html.xpath('//td[contains(text(),"You Save")]/../td[2]/span/text()') if
                                      item.strip()] else None
            if eight_val:
                activity_type.append('8')
                eight_val = ' '.join(re.findall(r'\d+', eight_val)).replace(',','')

            five_six_val = None
            activity_type = ','.join(activity_type)[:10]
            return (activity_type,one_two_val,three_four_val,five_six_val,eight_val)
        except Exception as e:
            print(f'{asin}的activity_type or val出错，{e}')

    def parse_tail_stars(self, asin, html):
        try:
            one_star = html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 1 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 1 stars")]//text()')[
                0].replace('%', '') if html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 1 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 1 stars")]//text()') else None
            two_star = html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 2 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 2 stars")]//text()')[
                0].replace('%', '') if html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 2 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 2 stars")]//text()') else None

            three_star = html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 3 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 3 stars")]//text()')[
                0].replace('%', '') if html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 3 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 3 stars")]//text()') else None

            four_star = html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 4 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 4 stars")]//text()')[
                0].replace('%', '') if html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 4 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 4 stars")]//text()') else None
            five_star = html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 5 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 5 stars")]//text()')[
                0].replace('%', '') if html.xpath(
                '//a[contains(@aria-label,"percent of reviews have 5 stars")]//text() | //span[contains(@aria-label,"percent of reviews have 5 stars")]//text()') else None
            if one_star and two_star and three_star:
                low_star = int(one_star) + int(two_star) + int(three_star)
            else:
                low_star = None
            return (one_star,two_star,three_star,four_star,five_star,low_star)
        except Exception as e:
            print(f'{asin}的stars出错，{e}')

    def parse_head_img_type(self,asin,html):
        try:
            together_asin_list = [re.findall('pd_rd_i=(.*?)&psc', item) for item in html.xpath(
                '//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-thumbnail-box__36bD3"]//div//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href')] if html.xpath(
                '//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-thumbnail-box__36bD3"]//div//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href') else None

            if together_asin_list:
                flat_list = [item for sublist in together_asin_list for item in sublist]
                together_asin = ','.join(flat_list)[:255]

            else:
                together_asin = None

            if not together_asin_list:
                together_asin_list = [re.findall('dp/(.*?)/ref', item) for item in html.xpath(
                    '//div[@class="a-section a-spacing-none _p13n-desktop-sims-fbt_fbt-desktop_link-area__1VLAZ"]/a/@href')] if html.xpath(
                    '//div[@class="a-section a-spacing-none _p13n-desktop-sims-fbt_fbt-desktop_link-area__1VLAZ"]/a/@href') else None
                if together_asin_list:
                    flat_list = [item for sublist in together_asin_list for item in sublist]
                    together_asin = ','.join(flat_list)[:255]

                else:
                    together_asin_list = [re.findall('product-reviews/(.*?)/ref', item) for item in html.xpath(
                        '//div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//a[@class="a-link-normal"]/@href')] if html.xpath(
                        '//div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//a[@class="a-link-normal"]/@href') else None
                    if together_asin_list:
                        flat_list = [item for sublist in together_asin_list for item in sublist]
                        together_asin = ','.join(flat_list)[:255]

                    else:
                        together_asin = None
            return together_asin
        except Exception as e:
            print(f'{asin}的together_asin出错，{e}')

    def parse_head_brand(self, asin, html):
        try:
            brand_str = html.xpath('//a[@id="bylineInfo"]/text()')[0] if html.xpath(
                '//a[@id="bylineInfo"]/text()') else None
            if brand_str and 'Brand: ' in brand_str:
                brand = brand_str.split('Brand: ')[1][:100]
            elif brand_str and 'Visit the ' in brand_str and ' Store' in brand_str:
                brand = brand_str.split('Visit the ')[1].split(' Store')[0][:100]
            else:
                brand = None
            return brand
        except Exception as e:
            print(f'{asin}的brand出错，{e}')

    def parse_head_ac_name(self, html):
        try:
            ac_name = [item.strip() for item in html.xpath('//span[@class="ac-for-text"]//text()') if item.strip()][0].split('in ')[1][:100]
        except IndexError:
            ac_name = None
        return ac_name


    def parse_head_material(self, asin, html):
        try:
            material = html.xpath('//table[@class="a-normal a-spacing-micro"]//span[text() = "Material"]/../../td[2]/span/text()')[0][:150] if html.xpath(
                    '//table[@class="a-normal a-spacing-micro"]//span[text() = "Material"]/../../td[2]/span/text()') else None
            return material
        except Exception as e:
            print(f'{asin}的material出错，{e}')


    def parse_head_node_id(self, asin, html):
        try:
            node_id_str = html.xpath('//ul[@class="a-unordered-list a-horizontal a-size-small"]//li[last()]//a/@href')[
                0] if html.xpath(
                '//ul[@class="a-unordered-list a-horizontal a-size-small"]//li[last()]//a/@href') else None
            if node_id_str:
                if node_id_str and 'node=' in node_id_str:
                    node_id = node_id_str.split('node=')[1][:20]
                elif node_id_str and 'fashion' in node_id_str and 'ref' in node_id_str:
                    node_id = node_id_str.split('fashion')[1].split('ref')[0][:20]
                else:
                    node_id = node_id_str[:20]
            else:
                node_id = None
            return node_id
        except Exception as e:
            print(f'{asin}的node_id出错，{e}')


    def parse_head_describe(self, asin, html):
        try:
            describe = '|-|'.join([item.lstrip().rstrip() for item in html.xpath(
                '//ul[@class="a-unordered-list a-vertical a-spacing-mini"]/li//text() | //ul[@class="a-unordered-list a-vertical a-spacing-small"]//li//text() | //div[@id="bookDescription_feature_div"]//li//text()')]) if html.xpath(
                '//ul[@class="a-unordered-list a-vertical a-spacing-mini"]/li//text() | //ul[@class="a-unordered-list a-vertical a-spacing-small"]//li//text() | //div[@id="bookDescription_feature_div"]//li//text()') else None

            return describe
        except Exception as e:
            print(f'{asin}的describe出错，{e}')

    def parse_head_date_info(self, asin):
        try:
            now = datetime.now()
            date_info = now.strftime('%Y-%m')
            return date_info
        except Exception as e:
            print(f'{asin}的date_info出错，{e}')

    def parse_tail_weight_str(self, asin, html):
        try:
            weight_str = \
                [item.strip().replace('\u200e', '') for item in html.xpath(
                    '//th[contains(text(),"Item Weight")]/../td//text() | //span[contains(text(),"Item Weight")]/../span[2]/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text()')
                 if item.strip()][0][:250] if [item.strip().replace('\u200e', '') for item in html.xpath(
                    '//th[contains(text(),"Item Weight")]/../td//text() | //span[contains(text(),"Item Weight")]/../span[2]/text() | //span[contains(text(),"Product Dimensions")]/../span[2]/text()')
                                         if item.strip()][:250] else None
            return weight_str
        except Exception as e:
            print(f'{asin}的weight_str出错，{e}')

    def parse_head_pattern_name(self, asin, html):
        try:
            pattern_name = \
                html.xpath(
                    '//div[@id="customer_review-RYQ2ES848T5PV"]//span[@data-hook="format-strip-linkless"]/text()')[
                    0].split('Pattern Name: ')[1][:50] if html.xpath(
                    '//div[@id="customer_review-RYQ2ES848T5PV"]//span[@data-hook="format-strip-linkless"]/text()') else None
            if not pattern_name:
                pattern_name = html.xpath('//div[@id="variation_pattern_name"]/div/span/text()')[
                    0].strip()[:50] if html.xpath(
                    '//div[@id="variation_pattern_name"]/div/span/text()') else None
            return pattern_name
        except Exception as e:
            print(f'{asin}的pattern_name出错，{e}')

    def parse_head_follow_sellers(self, html):
        try:
            follow_sellers = html.xpath('//span[@class="a-declarative"]/span[@class="a-color-base"]/text()')[0].split('(')[1].split(')')[0]
        except:
            follow_sellers = None
        return follow_sellers

    def parse_tail_sp_num(self, asin, html):
        try:
            sp_related_to_term = html.xpath('//div[@id="sp_detail2"]/@data-a-carousel-options')[0] if html.xpath(
                '//div[@id="sp_detail2"]/@data-a-carousel-options') else None
            if sp_related_to_term:
                sp_1 = re.findall('"set_size":(.*?),"filteredItems"', sp_related_to_term)[0]
            else:
                sp_1 = 0
            sp_4_stars_and_above = \
                html.xpath('//h2[contains(text(),"4 stars and above")]/../../../@data-a-carousel-options')[
                    0] if html.xpath(
                    '//h2[contains(text(),"4 stars and above")]/../../../@data-a-carousel-options') else None
            if sp_4_stars_and_above:
                sp_2 = re.findall('"set_size":(.*?),"filteredItems"', sp_4_stars_and_above)[0]
            else:
                sp_2 = 0
            free_delivery = html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../@data-a-carousel-options')[
                0] if html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../@data-a-carousel-options') else None
            if free_delivery:
                sp_3 = re.findall('"set_size":(.*?),"filteredItems"', free_delivery)[0]
            else:
                sp_3 = 0
            sp_num = str(sp_1) + ',' + str(sp_2) + ',' + str(sp_3)
            if sp_num:
                sp_num = sp_num[:30]
            return sp_num
        except Exception as e:
            print(f'{asin}的sp_num出错，{e}')

    def parse_tail_product_description(self, asin, html):
        try:
            product_description = '|-|'.join([item.strip() for item in html.xpath(
                '//div[@id="productDescription"]//span/text() | //h2[contains(text(),"Product Description")]/..//div[@class="celwidget aplus-module premium-module-3-four-column-images aplus-premium"]//text()')
                                              if item.strip()]) if html.xpath(
                '//div[@id="productDescription"]//span/text() | //h2[contains(text(),"Product Description")]/..//div[@class="celwidget aplus-module premium-module-3-four-column-images aplus-premium"]//text()') else None

            return product_description
        except Exception as e:
            print(f'{asin}的product_description出错，{e}')

    def parse_head_buy_sales(self, asin, html):
        try:
            buy_sales = html.xpath('//span[@id="social-proofing-faceout-title-tk_bought"]/span/text()')[0].replace(' ','') if html.xpath('//span[@id="social-proofing-faceout-title-tk_bought"]/span/text()') else None
            return buy_sales
        except Exception as e:
            print(f'{asin}的buy_sales出错，{e}')

    def parse_head_package_quantity(self, asin, html):
        try:
            package_quantity = html.xpath('//label[contains(text(),"Item Package Quantity:")]/../span/text()')[
                0].strip()[:50] if html.xpath('//label[contains(text(),"Item Package Quantity:")]/../span/text()') else None
            return package_quantity
        except Exception as e:
            print(f'{asin}的package_quantity出错，{e}')

    def parse_tail_product_json(self, asin, html):
        try:
            product_info_dict = html.xpath('//table[@class="a-normal a-spacing-micro"]//span/text()') if html.xpath(
                '//table[@class="a-normal a-spacing-micro"]//span/text()') else None
            if product_info_dict:
                product_json = json.dumps(
                    {product_info_dict[i]: product_info_dict[i + 1] for i in range(0, len(product_info_dict), 2)})
            else:
                product_json = product_info_dict
            return product_json
        except Exception as e:
            print(f'{asin}的product_json出错，{e}')
    def parse_tail_product_detail_json(self, asin, html):
        try:
            detail_json = [re.sub(r'\s+', ' ', item.strip().replace('\u200e', '').replace('\u200f', '')) for item in
                           html.xpath(
                               '//h1[contains(text(),"Technical Details")]/../../..//tr//text() | //h1[contains(text(),"Additional Information")]/..//td[@class="a-size-base prodDetAttrValue"]/..//text() | //h2[contains(text(),"Product details")]/../div[@id="detailBullets_feature_div"]/ul//text()')
                           if item.strip()] if html.xpath(
                '//h1[contains(text(),"Technical Details")]/../../..//tr//text() | //h1[contains(text(),"Additional Information")]/..//td[@class="a-size-base prodDetAttrValue"]/..//text() | //h2[contains(text(),"Product details")]/../div[@id="detailBullets_feature_div"]/ul//text()') else None

            if detail_json:
                detail_json = [text for text in detail_json if 'Learn More' not in text]
                detail_json = [text for text in detail_json if "'click here'" not in text]
                product_detail_json = json.dumps(
                    {detail_json[i].replace(' :',''): detail_json[i + 1] for i in range(0, len(detail_json), 2)})
            else:
                product_detail_json = detail_json
            return product_detail_json
        except Exception as e:
            print(f'{asin}的product_detail_json出错，{e}')

    def parse_tail_review_ai_text(self, asin, html):
        try:
            review_ai_text = html.xpath('//h3[contains(text(),"Customers say")]/../../p/span/text()')[0] if html.xpath(
                '//h3[contains(text(),"Customers say")]/../../p/span/text()') else None
            return review_ai_text
        except Exception as e:
            print(f'{asin}的review_ai_text出错，{e}')

    def parse_tail_sp_initial_seen_asins_json(self, asin, html):
        try:
            # sp_initial_seen_asins_json
            seen_asins = [item.split('_')[-1] for item in html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
            seen_asins_title = html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
            seen_asins_src = html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div//img/@src') if html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li/div//img/@src') else None
            seen_asins_total_comments = [item.xpath('./div//span[@class="a-color-link"]/text()')[0] if item.xpath(
                './div//span[@class="a-color-link"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li')]
            seen_asins_price = [item.xpath('./div//span[@class="a-offscreen"]/text()')[0] if item.xpath(
                './div//span[@class="a-offscreen"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"Products related to this item")]/../../../div[@class="a-row"]//ol/li')]
            if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
                initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments,
                                       seen_asins_price)
                sp_initial_seen_asins_json = json.dumps(
                    [{"seen_asins": seen_asins, "seen_asins_title": seen_asins_title,
                      "seen_asins_src": seen_asins_src,
                      "seen_asins_total_comments": seen_asins_total_comments,
                      "seen_asins_price": seen_asins_price} for
                     seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments, seen_asins_price
                     in initial_combined])
            else:
                sp_initial_seen_asins_json = None
            return sp_initial_seen_asins_json
        except Exception as e:
            print(f'{asin}的sp_initial_seen_asins_json出错，{e}')

    def parse_tail_sp_4stars_initial_seen_asins_json(self, asin, html):
        try:
            # sp_4stars_initial_seen_asins_json
            seen_asins = [item.split('_')[-1] for item in html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
            seen_asins_title = html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
            seen_asins_src = html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div//img/@src') if html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li/div//img/@src') else None
            seen_asins_total_comments = [item.xpath('./div//span[@class="a-color-link"]/text()')[0] if item.xpath(
                './div//span[@class="a-color-link"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li')]
            seen_asins_price = [item.xpath('./div//span[@class="a-offscreen"]/text()')[0] if item.xpath(
                './div//span[@class="a-offscreen"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"4 stars and above")]/../../../div[@class="a-row"]//ol/li')]

            if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
                delivery_initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments,
                                                seen_asins_price)
                sp_4stars_initial_seen_asins_json = json.dumps([{"seen_asins": seen_asins,
                                                                 "seen_asins_title": seen_asins_title,
                                                                 "seen_asins_src": seen_asins_src,
                                                                 "seen_asins_total_comments": seen_asins_total_comments,
                                                                 "seen_asins_price": seen_asins_price} for
                                                                seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments, seen_asins_price
                                                                in delivery_initial_combined])
            else:
                sp_4stars_initial_seen_asins_json = None
            return sp_4stars_initial_seen_asins_json
        except Exception as e:
            print(f'{asin}的sp_4stars_initial_seen_asins_json出错，{e}')

    def parse_tail_sp_delivery_initial_seen_asins_json(self, asin, html):
        try:
            # sp_delivery_initial_seen_asins_json
            seen_asins = [item.split('_')[-1] for item in html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/@id')] if html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/@id') else None
            seen_asins_title = html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/@title') if html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/@title') else None
            seen_asins_src = html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/img/@src') if html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li/div/a/img/@src') else None
            seen_asins_total_comments = [item.xpath('.//span[@class="a-color-link"]/text()')[0] if item.xpath(
                './/span[@class="a-color-link"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li')]
            seen_asins_price = [item.xpath('.//span[@class="a-offscreen"]/text()')[0] if item.xpath(
                './/span[@class="a-offscreen"]/text()') else None for item in html.xpath(
                '//h2[contains(text(),"Related products with free delivery on eligible orders")]/../../../div[@class="a-row"]//ol/li')]

            if seen_asins and seen_asins_title and seen_asins_src and seen_asins_total_comments and seen_asins_price:
                delivery_initial_combined = zip(seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments,
                                                seen_asins_price)
                sp_delivery_initial_seen_asins_json = json.dumps([{"seen_asins": seen_asins,
                                                                   "seen_asins_title": seen_asins_title,
                                                                   "seen_asins_src": seen_asins_src,
                                                                   "seen_asins_total_comments": seen_asins_total_comments,
                                                                   "seen_asins_price": seen_asins_price} for
                                                                  seen_asins, seen_asins_title, seen_asins_src, seen_asins_total_comments, seen_asins_price
                                                                  in delivery_initial_combined])
            else:
                sp_delivery_initial_seen_asins_json = None
            return sp_delivery_initial_seen_asins_json
        except Exception as e:
            print(f'{asin}的sp_delivery_initial_seen_asins_json出错，{e}')

    def parse_tail_compare_similar_asin_json(self, asin, html):
        try:
            # compare_similar_asin_json
            compare_asin = [item.split('-')[-1] for item in html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]/div/@id | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]/a/div/@id')] if html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]/div/@id | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]/a/div/@id') else None
            compare_asin_src = html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@src | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]//img/@src') if html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@src | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]//img/@src') else None
            compare_asin_title = html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@alt | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]//img/@alt') if html.xpath(
                '//h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@tabindex="0"]//img/@alt | //h2[contains(text(),"Compare with similar items")]/../div//tr[1]//div[@class="a-section a-spacing-mini"]//img/@alt') else None

            if compare_asin and compare_asin_src and compare_asin_title:
                compare_combined = zip(compare_asin, compare_asin_src, compare_asin_title)
                compare_similar_asin_json = json.dumps(
                    [{"compare_asin": compare_asin, "compare_asin_src": compare_asin_src,
                      "compare_asin_title": compare_asin_title} for
                     compare_asin, compare_asin_src, compare_asin_title in
                     compare_combined])
            else:
                compare_similar_asin_json = None
            return compare_similar_asin_json
        except Exception as e:
            print(f'{asin}的compare_similar_asin_json出错，{e}')

    def parse_tail_together_asin_json(self, asin, html):
        try:
            toge_asin = [item for sublist in [re.findall('/dp/(.*?)/ref', item) for item in html.xpath(
                '//h2[contains(text(),"Frequently bought together")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href | //h2[contains(text(),"Buy it with")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href')]
                         for item in sublist] if html.xpath(
                '//h2[contains(text(),"Frequently bought together")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href | //h2[contains(text(),"Buy it with")]/../../..//a[@class="a-link-normal _p13n-desktop-sims-fbt_fbt-desktop_image-link__17L3C"]/@href') else None
            if not toge_asin:
                toge_asin = [re.findall('product-reviews/(.*?)/ref=', item) for item in html.xpath(
                    '//div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//a[@class="a-link-normal"]/@href')] if html.xpath(
                    '//div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//a[@class="a-link-normal"]/@href') else None

            toge_title = html.xpath(
                '//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-detail-faceout-box___WyNy"][position() > 1]//span[@class="a-size-base"]/text() | //div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//span[@class="a-size-base"]/text()')
            toge_price = html.xpath(
                '//div[@class="a-cardui _p13n-desktop-sims-fbt_fbt-desktop_new-detail-faceout-box___WyNy"][position() > 1]//span[@class="a-offscreen"]/text() | //div[@class="a-cardui _sp-desktop-thematic-bundle_thematicBundle-desktop_new-thumbnail-box__1W9Ku _sp-desktop-thematic-bundle_thematicBundle-desktop_two-item-thumbnail-box__7kF95"]/div[position() > 1]//span[@class="a-offscreen"]/text()')
            if toge_asin and toge_title and toge_price:
                together_asin_combined = zip(toge_asin, toge_title, toge_price)
                together_asin_json = json.dumps(
                    [{"together_asin": asin, "together_asin_title": title, "together_asin_price": price} for
                     asin, title, price in together_asin_combined])
            else:
                together_asin_json = None
            return together_asin_json
        except Exception as e:
            print(f'{asin}的together_asin_json出错，{e}')

    def parse_tail_customer_reviews_json(self, asin, html):
        try:
            reviews_json = [item.strip() for item in html.xpath('//h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-size-base a-color-base"]/text() | //h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-icon-alt"]/text()  | //h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-size-base a-color-tertiary"]/text()') if item.strip()] if html.xpath('//h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-size-base a-color-base"]/text() | //h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-icon-alt"]/text()  | //h1[contains(text(),"Customer ratings by feature")]/../..//span[@class="a-size-base a-color-tertiary"]/text()') else None
            if reviews_json:
                try:
                    customer_reviews_json = json.dumps(
                        [{reviews_json[i].rstrip().lstrip(): reviews_json[i + 1]} for i in range(0, len(reviews_json), 2)])
                    return customer_reviews_json
                except :
                    try:
                        customer_reviews_json = []
                        length = len(reviews_json)
                        for i in range(0, length - 2, 3):
                            customer_reviews_json.append({reviews_json[i]: reviews_json[i + 1]})
                            if i + 3 < length:
                                customer_reviews_json.append({reviews_json[i + 3]: reviews_json[i + 4]})
                        return json.dumps(customer_reviews_json)
                    except:
                        customer_reviews_json = None
            else:
                customer_reviews_json = reviews_json
            return customer_reviews_json
            # reviews_json = html.xpath(
            #     '//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-size-base a-color-base"]/text() |//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-icon-alt"]/text()') if html.xpath(
            #     '//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-size-base a-color-base"]/text() |//div[@id="customerReviewsAttribute_feature_div"]//div[@class="a-section a-spacing-none"]//span[@class="a-icon-alt"]/text()') else None
            # try:
            #     if reviews_json:
            #         customer_reviews_json = json.dumps(
            #             [{reviews_json[i].rstrip().lstrip(): reviews_json[i + 1]} for i in range(0, len(reviews_json), 2)])
            #     return customer_reviews_json
            # except Exception as e:
            #     customer_reviews_json = None
        except Exception as e:
            print(f'{asin}的customer_reviews_json出错，{e}')

    def parse_tail_lob_asin_json(self, asin, html):
        try:
            # lob_asin_json
            lob_asin = html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//a/@href') if html.xpath(
                '//div[@id="pba-lob-carousel-row"]//ol/li//a/@href') else None
            lob_asin_img = html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li//img/@src') if html.xpath(
                '//div[@id="pba-lob-carousel-row"]//ol/li//img/@src') else None
            lob_asin_title = html.xpath(
                '//div[@id="pba-lob-carousel-row"]//ol/li//div[@class="a-section a-spacing-none"]/span/span/text()')
            lob_asin_price = [
                item.xpath('.//span[@class="a-price pba-lob-bundle-buy-price"]/span[@class="a-offscreen"]/text()')[
                    0] if item.xpath(
                    './/span[@class="a-price pba-lob-bundle-buy-price"]/span[@class="a-offscreen"]/text()') else None
                for
                item in html.xpath('//div[@id="pba-lob-carousel-row"]//ol/li')]

            if lob_asin and lob_asin_img and lob_asin_title and lob_asin_price:
                lob_combined = zip(lob_asin, lob_asin_img, lob_asin_title, lob_asin_price)
                lob_asin_json = json.dumps([{"lob_asin": lob_asin, "lob_asin_img": lob_asin_img,
                                             "lob_asin_title": lob_asin_title, "lob_asin_price": lob_asin_price,
                                             "lob_asin_total_comment": None} for
                                            lob_asin, lob_asin_img, lob_asin_title, lob_asin_price in lob_combined])
            else:
                lob_asin_json = None
            return lob_asin_json
        except Exception as e:
            print(f'{asin}的lob_asin_json出错，{e}')

    def parse_tail_review_label_json(self, asin, html):
        try:
            # review_label_json
            review_lable_title = html.xpath(
                '//div[@class="a-section a-spacing-small a-spacing-top-small _cr-product-insights_style_aspect-symbol-list__24amT"]/a/text() | //div[@data-hook="cr-insights-widget-aspects"]//button//span[@class="a-size-base"]/text()') if html.xpath(
                '//div[@class="a-section a-spacing-small a-spacing-top-small _cr-product-insights_style_aspect-symbol-list__24amT"]/a/text() | //div[@data-hook="cr-insights-widget-aspects"]//button//span[@class="a-size-base"]/text()') else None
            if review_lable_title:
                review_label_json = {}
                for lable_title in review_lable_title:
                    reviews = '|-|'.join([item.replace('Read more', '') for item in
                                          html.xpath(f'//div[@data-aspect= "{lable_title}"]//text()')])
                    review_label_json[lable_title] = reviews
                json.dumps(review_label_json)
            else:
                review_label_json = None
            return review_label_json
        except Exception as e:
            print(f'{asin}的review_label_json出错，{e}')

    def parse_tail_image_view(self, asin, html):
        try:
            # views = html.xpath('//div[@id="ivImagesTab"]') if html.xpath('//div[@id="ivImagesTab"]') else None
            views = html.xpath('//div[@id="main-image-container"]/script[@type="text/javascript"]') if html.xpath(
                '//div[@id="main-image-container"]/script[@type="text/javascript"]') else None

            if views:
                image_view = 1
            else:
                image_view = 0
            return image_view
        except Exception as e:
            print(f'{asin}的image_view出错，{e}')

    def parse_tail_current_asin(self, asin, html):
        try:
            curr_asin = html.xpath('//link[@rel="canonical"]/@href')[0].split('dp/')[1] if html.xpath('//link[@rel="canonical"]/@href') else None
            if curr_asin:
                if curr_asin == asin:
                    current_asin = None
                else:
                    current_asin = curr_asin[:25]
            else:
                current_asin = None
            return current_asin
        except Exception as e:
            print(f'{asin}的current_asin出错，{e}')

    def parse_tail_img_list(self, asin, html):
        try:
            img_src = html.xpath(
                '//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"]//img/@src | //ul//li[@class="a-spacing-small item"]//img/@src') if html.xpath(
                '//ul//li[@class="a-spacing-small item imageThumbnail a-declarative"]//img/@src | //ul//li[@class="a-spacing-small item"]//img/@src') else None
            video_src = html.xpath(
                '//ul/li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') if html.xpath(
                '//ul/li[@class="a-align-top a-spacing-small item videoBlockIngress videoBlockDarkIngress"]//img/@src') else None
            pics_src = html.xpath(
                '//h2[contains(text(),"Product Description")]/..//div[@class="aplus-module-wrapper apm-spacing apm-floatnone apm-fixed-width"]//img/@data-src') if html.xpath(
                '//h2[contains(text(),"Product Description")]/..//div[@class="aplus-module-wrapper apm-spacing apm-floatnone apm-fixed-width"]//img/@data-src') else None
            if img_src and video_src and pics_src:
                img_list = json.dumps([
                    [asin, src, idx, type]
                    for type, srcs in enumerate([img_src, video_src, pics_src], start=1)
                    if srcs is not None
                    for idx, src in enumerate(srcs, start=1)])
            else:
                img_list = None
            return img_list
        except Exception as e:
            print(f'{asin}的img_list出错，{e}')
    def parse_tail_min_match_asin_json(self, asin, html):
        try:
            min_match_asin_json = None
            return min_match_asin_json
        except Exception as e:
            print(f'{asin}的min_match_asin_json出错，{e}')

    def parse_tail_parent_asin_and_variat_list_and_variat_num(self, asin, html,response_text):
        try:
            parent_asin = re.findall('"parent_asin":"(.*?)",', response_text)[0] if re.findall('"parent_asin":"(.*?)",',
                                                                                               response_text) else None
            if not parent_asin:
                parent_asin = html.xpath('//link[@rel="canonical"]/@href')[0].split('/')[-1] if html.xpath(
                    '//link[@rel="canonical"]/@href') else None

            asin_detail = re.findall('"dimensionValuesDisplayData" : \{(.*?)},', response_text) if re.findall(
                '"dimensionValuesDisplayData" : \{(.*?)},', response_text) else None
            # print(asin_detail)
            if asin_detail:
                # 使用正则表达式提取 asins 和 details
                asins = re.findall(r'"(\w+)":\[', str(asin_detail))
                variat_num = len(asins)
                try:
                    details = [json.loads(item) for item in re.findall('":(\[".*?"])', str(asin_detail))]
                    # print(222222,details)
                    variat_list = [[asin] + detail for asin, detail in zip(asins, details)]
                    # print(333333,variat_list)
                except:
                    try:
                        color_size = re.findall('":(\[".*?"])', str(asin_detail))
                        details = [item.replace('\\\'', '').replace('\\\\"', '') for item in color_size]
                        variat_list = [[asin] + ast.literal_eval(detail) for asin, detail in zip(asins, details)]
                    except:
                        color_size = re.findall('":(\[".*?"])', str(asin_detail))
                        details = [
                            item.replace('\\\'', '').replace('\\\\"', '').replace('"', '').replace('[', '').replace(']',
                                                                                                                    '')
                            for item in color_size]
                        variat_list = [[asins[i], details[i]] for i in range(len(asins))]

                # 向每个子列表添加额外的值
                for detail in variat_list:
                    detail.extend([parent_asin, 1, None, None])
                variat_list = json.dumps(variat_list)
            else:
                variat_list = None
                variat_num = 0
            return (parent_asin,variat_list,variat_num)
        except Exception as e:
            print(f'{asin}的variat_list or variat_num出错，{e}')

    def parse_data_new(self,asin ,term_html,id_org):
        try:
            response_text = term_html
            # filename = f"{asin}.html"
            # with open(filename, 'w', encoding='utf-8') as f:
            #     f.write(term_html)
            html = etree.HTML(response_text)
            # with open('get_pic.py', 'r',encoding='utf-8') as f:
            #     response_text = f.read()
            # print(response)
            # html = etree.HTML(response_text)
            img_url,img_type,img_num = self.parse_head_img_url_and_img_type_and_img_num(asin,html)
            title,title_len = self.parse_head_title(asin,html)
            price = self.parse_head_price(asin,html)
            rating = self.parse_head_rating(asin,html)
            total_comments = self.parse_head_total_comments(asin,html)
            buy_box_seller_type,seller_json = self.parse_head_buy_box_seller_type_and_seller_json(asin,html)
            page_inventory = self.parse_head_page_inventory(asin,html)
            category = self.parse_head_category(asin,html)
            volume,weight = self.parse_head_volume_and_weight(asin,html)
            rank = self.parse_tail_rank(asin,html)
            launch_time = self.parse_tail_launch_time(asin,html)
            activity_type,one_two_val,three_four_val,five_six_val,eight_val = self.parse_head_activity_type_and_val(asin,html)
            one_star,two_star,three_star,four_star,five_star,low_star = self.parse_tail_stars(asin,html)
            together_asin = self.parse_head_img_type(asin,html)
            brand = self.parse_head_brand(asin,html)
            ac_name = self.parse_head_ac_name(html)
            material = self.parse_head_material(asin, html)
            node_id = self.parse_head_node_id(asin, html)
            describe = self.parse_head_describe(asin, html)
            date_info = self.parse_head_date_info(asin)
            weight_str = self.parse_tail_weight_str(asin, html)
            pattern_name = self.parse_head_pattern_name(asin, html)
            follow_sellers = self.parse_head_follow_sellers(html)
            sp_num = self.parse_tail_sp_num(asin, html)
            product_description = self.parse_tail_product_description(asin, html)
            buy_sales = self.parse_head_buy_sales(asin, html)
            package_quantity = self.parse_head_package_quantity(asin, html)
            product_json = self.parse_tail_product_json(asin, html)
            product_detail_json = self.parse_tail_product_detail_json(asin, html)
            review_ai_text = self.parse_tail_review_ai_text(asin, html)
            sp_initial_seen_asins_json = self.parse_tail_sp_initial_seen_asins_json(asin, html)
            sp_4stars_initial_seen_asins_json = self.parse_tail_sp_4stars_initial_seen_asins_json(asin, html)
            sp_delivery_initial_seen_asins_json = self.parse_tail_sp_delivery_initial_seen_asins_json( asin, html)
            compare_similar_asin_json = self.parse_tail_compare_similar_asin_json(asin, html)
            together_asin_json = self.parse_tail_together_asin_json(asin, html)
            customer_reviews_json = self.parse_tail_customer_reviews_json(asin, html)
            lob_asin_json = self.parse_tail_lob_asin_json(asin, html)
            review_label_json = self.parse_tail_review_label_json(asin, html)
            image_view = self.parse_tail_image_view(asin, html)
            current_asin = self.parse_tail_current_asin(asin, html)
            img_list = self.parse_tail_img_list(asin, html)
            min_match_asin_json = self.parse_tail_min_match_asin_json( asin, html)
            parent_asin,variat_list,variat_num = self.parse_tail_parent_asin_and_variat_list_and_variat_num(asin, html, response_text)

            item = {}
            item['id'] = id_org
            item['asin'] = asin
            item['img_url'] = img_url
            item['title'] = title
            item['title_len'] = title_len
            item['price'] = price
            item['rating'] = rating
            item['total_comments'] = total_comments
            item['buy_box_seller_type'] = buy_box_seller_type
            item['page_inventory'] = page_inventory
            item['category'] = category
            item['volume'] = volume
            item['weight'] = weight
            item['rank'] = rank
            item['launch_time'] = launch_time
            item['category_state'] = None
            item['img_num'] = img_num
            item['img_type'] = img_type
            item['activity_type'] = activity_type
            item['one_two_val'] = one_two_val
            item['three_four_val'] = three_four_val
            item['five_six_val'] = five_six_val
            item['eight_val'] = eight_val
            item['qa_num'] = None
            item['one_star'] = one_star
            item['two_star'] = two_star
            item['three_star'] = three_star
            item['four_star'] = four_star
            item['five_star'] = five_star
            item['low_star'] = low_star
            item['together_asin'] = together_asin
            item['brand'] = brand
            item['ac_name'] = ac_name
            item['material'] = material
            item['node_id'] = node_id
            item['data_type'] = None
            item['sp_num'] = sp_num
            item['describe'] = describe
            item['date_info'] = date_info
            item['weight_str'] = weight_str
            item['package_quantity'] = package_quantity
            item['pattern_name'] = pattern_name
            item['spider_int'] = None
            item['follow_sellers'] = follow_sellers
            item['product_description'] = product_description
            item['buy_sales'] = buy_sales
            item['image_view'] = image_view
            item['product_json'] = product_json
            item['product_detail_json'] = product_detail_json
            item['review_ai_text'] = review_ai_text
            item['review_label_json'] = json.dumps(item.get('review_label_json', {}))
            item['lob_asin_json'] = lob_asin_json
            item['sp_initial_seen_asins_json'] = sp_initial_seen_asins_json
            item['sp_4stars_initial_seen_asins_json'] = sp_4stars_initial_seen_asins_json
            item['sp_delivery_initial_seen_asins_json'] = sp_delivery_initial_seen_asins_json
            item['compare_similar_asin_json'] = compare_similar_asin_json
            item['customer_reviews_json'] = customer_reviews_json
            item['together_asin_json'] = together_asin_json
            item['min_match_asin_json'] = min_match_asin_json
            item['seller_json'] = seller_json
            item['variat_num'] = variat_num
            item['current_asin'] = current_asin
            item['img_list'] = img_list
            item['variat_list'] = variat_list
            item['parent_asin'] = parent_asin

            # print(item)
            return item

        except Exception as e:
            print(f"{asin}的parse_data_new赋值出错,{e}")
