import re
from lxml import etree
import json


class ParseSearchTermUs(object):

    def __init__(self, page_source, driver=None, search_term=None, page=1, site_name='us'):
        self.etree_html = etree.HTML(page_source)
        self.search_term = search_term
        self.search_term_html = page_source
        self.page = page
        self.site_name = site_name
        # zr和sp才有page和page_row
        self.zr_list = []
        self.sp_list = []
        self.sb_list = []
        self.ac_list = []
        self.bs_list = []
        self.er_list = []
        self.tr_list = []
        self.hr_list = []
        self.buy_text_list = []
        """
        search_term, asin, page, page_row, cate_type,
        title, img, price, rating, reviews
        """
        self.sb_list_all = []
        self.sp_list_all = []
        # 获取搜索的所有结果--只针对page=1
        self.sold_list = []

    def parse_sold_quantity(self):
        if self.page == 1:
            ele_span_list = self.etree_html.xpath(
                '//span[@data-component-type="s-result-info-bar"]//h2/span/text()|//div[@class="a-section a-spacing-small a-spacing-top-small"]//span/text()')
            print("产品总数:", ele_span_list)
            quantity_being_sold_list = []
            if len(ele_span_list) > 0:
                ele_span_list[0] = ele_span_list[0].replace('rgebnissen oder', '').replace('suggestions for',
                                                                                           '').replace(" ", "").replace(
                    "1-48", "").replace("1-16", "").replace(self.search_term, '')
                if len(ele_span_list[0]) < 80:
                    ele_text = ele_span_list[0].replace(".", "").replace(",", "").replace(" ", "").replace("\xa0", "")
                    ele_a = re.findall("\d+-\d+", ele_text)
                    if len(ele_a) == 0:
                        ele_a = re.findall("\d+–\d+", ele_text)
                    if ele_a:
                        ele_text = ele_text.replace(ele_a[0], '')

                    quantity_being_sold_list = re.findall("(\d+)", ele_text)
                    if len(quantity_being_sold_list) > 1:
                        quantity_being_sold_list = [quantity_being_sold_list[-1]]
            else:
                ele_span_list1 = self.etree_html.xpath('//h1//span/text()')
                print('产品总数::::', ele_span_list1)
                if len(ele_span_list1) > 1:
                    ele_span_list1[0] = ele_span_list1[0].replace('Ergebnissen oder', '').replace('suggestions for',
                                                                                                  '').replace(" ",
                                                                                                              "").replace(
                        "1-48", "").replace("1-16", "").replace(self.search_term, '')
                    if len(ele_span_list1[0]) < 80:
                        ele_text = ele_span_list1[0].replace(".", "").replace(",", "").replace(" ", "")
                        ele_b = re.findall("\d+-\d+", ele_text)
                        if len(ele_b) == 0:
                            ele_b = re.findall("\d+–\d+", ele_text)
                        if ele_b:
                            ele_text = ele_text.replace(ele_b[0], '')
                        quantity_being_sold_list = re.findall("(\d+)", ele_text)
            quantity_being_sold = quantity_being_sold_list[0] if quantity_being_sold_list else len(self.zr_list)
            try:
                if ('one result for' in ele_text) or (len(quantity_being_sold) > 7):
                    quantity_being_sold = 1
            except:
                pass
            if ele_span_list:
                quantity_being_sold_str = ele_span_list[0]
            else:
                quantity_being_sold_str = None
            totalResultCount_list = re.findall(r'totalResultCount.*\);</script>', self.search_term_html)
            if totalResultCount_list:
                try:
                    result_count = '{"' + totalResultCount_list[0].replace(');</script>', '')
                    result_dict = json.loads(result_count.replace('\\', ''))
                    quantity_being_sold = result_dict.get('totalResultCount')
                except:
                    result_count = None
            else:
                result_count = None

            print("**********************xxxxxxxxxxxxxxxxxx:", self.search_term, quantity_being_sold, result_count)
            departments_list = self.etree_html.xpath(
                "//div[@id='departments']//span[@class='a-declarative']//a/span/text()")
            if departments_list:
                departments = '|-|'.join(departments_list)
            else:
                departments = None
            self.sold_list.append(
                [self.search_term, quantity_being_sold, quantity_being_sold_str, result_count, departments])

    def parse_asin_zr(self):
        """
        返回通过data-asin匹配到的所有asin中，排除了sb、sp的对应asin --> 剩余zrasin
        """
        asin_all = self.etree_html.xpath('//div[@data-asin]/@data-asin')
        asin_all_str = "-".join(asin_all).replace('/', '')
        asin_all = re.findall("(\w+)", asin_all_str)
        self.asin_all = asin_all
        asin_sb = self.sb_list_all
        asin_sp = self.sp_list_all
        asin_sb.extend(asin_sp)
        for asin in asin_sb:
            if asin in asin_all:
                asin_all.remove(asin)
        return asin_all

    def parse_type_common(self, asin_list=None, cate_type=None):
        """
        asin_list: list
        """
        asin_list = list(dict.fromkeys(asin_list))  # 去重
        asin_detail_all_list = []
        cate_type_copy = 1
        asin_detail_dict = {
            "title": None,
            "img": None,
            "price": None,
            "rating": None,
            "reviews": None
        }
        if len(asin_list):
            for asin in asin_list:
                asin_detail_list = []
                asin_detail_list.extend(
                    (self.search_term, asin, self.page, asin_list.index(asin) + 1, cate_type_copy))
                if cate_type in ['zr', 'sp']:
                    asin_detail_list.extend(self.parse_detail(asin=asin, cate_type=cate_type).values())
                else:
                    asin_detail_list.extend(asin_detail_dict.values())  # 除了zr、sp，其他都不需要解析asin详情
                asin_detail_all_list.append(asin_detail_list)
        else:
            pass
        return asin_detail_all_list

    def parse_other_type_common(self, cate_type='er', asin=None, page_row=None):
        """
        Parse cate_type: er, tr. hr
        """
        div_list = self.etree_html.xpath('//div[@data-asin="" and @data-index]')
        asin_detail_all_list = []
        for div in div_list:
            h3_list = div.xpath(
                './/h3/text()|.//div[contains(@class,"s-text-uppercase")]//text()|.//div[@class="a-fixed-left-grid"]//span//text()|.//div[@class="sg-row"]//span[contains(@class,"a-size-medium-plus")]/text()')
            if h3_list:
                content = ''.join(h3_list).lower()
                flag = False
                if cate_type == 'er':
                    if self.site_name == 'us':
                        flag = ("editorial" in content) or ("recommendations" in content)
                    elif self.site_name == 'de':
                        flag = ("empfehlungen" in content) or ("verlagspartner" in content)
                    elif self.site_name == 'uk':
                        flag = ("recommended" in content) or ("article" in content)
                    elif self.site_name == 'fr':
                        flag = ("article" in content) or ("recommand" in content)
                    elif self.site_name == 'it':
                        flag = ("articolo" in content) or ("consigliato" in content)
                    elif self.site_name == 'es':
                        flag = ("artículo" in content) or ("recomendado" in content)
                    cate_type = 1
                elif cate_type == 'tr':
                    # us
                    if ("top rated" in content) and ("from our brand" not in content):
                        cate_type = 1
                        flag = True
                    elif ("top rated" in content) and ("from our brand" in content):
                        cate_type = 2
                        flag = True
                    # es
                    elif ("lo mejor de" in content) and ("nuestras marcas" in content):
                        cate_type = 2
                        flag = True
                    # fr
                    elif ("mieux noté parmi" in content) and ("nos marques" in content):
                        cate_type = 2
                        flag = True
                    # it
                    elif ("dai nostri" in content) and ("marchi più amati" in content):
                        cate_type = 2
                        flag = True
                    # de
                    elif ("bestbewertete" in content) and ("eigenprodukte" in content):
                        cate_type = 2
                        flag = True
                    else:
                        flag = False
                elif cate_type == 'ac':
                    flag = ("amazon's" in content) and ("choice" in content)
                    cate_type = 1
                elif cate_type == 'hr':
                    # us uk
                    if ("highly rated" in content):
                        flag = True
                    # de
                    elif ("hoch bewertet" in content):
                        flag = True
                    # fr
                    elif ("bonnes évaluations" in content):
                        flag = True
                    # es
                    elif ("mejor valorados" in content):
                        flag = True
                    # it
                    elif ("valutazione alta" in content):
                        flag = True
                else:
                    flag = False
                if flag:
                    li_list = div.xpath('.//li[@class="a-carousel-card"]')
                    if li_list:
                        for li in li_list:
                            asin_detail_dict = {
                                "title": None,
                                "img": None,
                                "price": None,
                                "rating": None,
                                "reviews": None
                            }
                            asin_list = li.xpath('.//div[@data-asin]/@data-asin')
                            if asin_list:
                                for asin in asin_list:
                                    asin_detail_list = [self.search_term, asin, self.page, page_row, cate_type]
                                    price_list = li.xpath(
                                        './/span[@class="a-price"]/span[@class="a-offscreen"]/text()')
                                    span_list = li.xpath(
                                        './/div[@class="a-row a-size-small"]/span[@aria-label]/@aria-label')
                                    img_list = li.xpath('.//img[@class="s-image" and @src]/@src')
                                    h2_list = li.xpath('.//h2//text()')
                                    if price_list:
                                        asin_detail_dict['price'] = price_list[0]
                                    if span_list == 2:
                                        asin_detail_dict['rating'], asin_detail_dict['reviews'] = span_list[0], \
                                                                                                  span_list[1]
                                    if img_list:
                                        asin_detail_dict['img'] = img_list[0]
                                    if h2_list:
                                        asin_detail_dict['title'] = h2_list[0]
                                    asin_detail_list.extend(asin_detail_dict.values())
                                    asin_detail_all_list.append(asin_detail_list)
                            break
        return asin_detail_all_list

    def parse_zr(self):
        try:
            asin_list = self.parse_asin_zr()
            self.zr_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='zr'))
        except Exception as e:
            pass

    def parse_buy(self):
        ## 四月替换以下 同时更改表字段
        for i in self.asin_all:
            buy_text_list = self.etree_html.xpath(
                f'//div[@data-asin="{i}"]//span[contains(text(),"bought in past")]/text()')
            if len(buy_text_list) == 0:
                buy_text_list = self.etree_html.xpath(
                    f'//div[@data-asin="{i}"]//div[@class="a-row a-size-base"]/span[@class="a-size-base a-color-secondary"]/text()')
                if len(buy_text_list) == 0:
                    if self.site_name == 'us' or self.site_name == 'uk':
                        buy_text_list = self.etree_html.xpath(
                            f'//div[@data-asin="{i}"]//span[contains(text(),"past ")]/text()')
                    else:
                        buy_text_list = self.etree_html.xpath(
                            f'//div[@data-asin="{i}"]//span[contains(text()," im letzten")]/text()')
            label_text_list = self.etree_html.xpath(
                f"//div[@data-asin='{i}']//div[contains(@class,'a-size-base a-color-base')]/a/text()")
            if buy_text_list:
                if len(buy_text_list[0]) < 2:
                    buy_text_list = [None]
            else:
                buy_text_list = [None]
            print('月销：：', buy_text_list)

            asin_brand_list = self.etree_html.xpath(f'//div[@data-asin="{i}"]//h2/following-sibling::div/span/text()|//div[@data-asin="{i}"]//div[@data-cy="title-recipe"]//h2/span[@class="a-size-base-plus a-color-base"]/text()')
            if asin_brand_list:
                asin_brand = asin_brand_list[0]
            else:
                asin_brand = None
            if label_text_list:
                if len(label_text_list[0]) < 2:
                    label_text_list = [None]
            else:
                label_text_list = [None]

            if label_text_list[0]:
                while ' ' in label_text_list:
                    label_text_list.remove(' ')
                while '' in label_text_list:
                    label_text_list.remove('')
                label_data = '&&&'.join(label_text_list)
            else:
                label_data = label_text_list[0]
            if label_data or buy_text_list[0]:
                self.buy_text_list.append([self.search_term, i, self.page, buy_text_list[0], label_data, asin_brand])

    def parse_sp(self):
        try:
            sp_asin_list = []
            if self.site_name == 'us' or self.site_name == 'uk':
                brand_sp_asin_list = self.etree_html.xpath(
                    '//span[contains(text(),"from Amazon brands")]/../../../../../../../../div//div/@data-csa-c-asin')
            else:
                brand_sp_asin_list = self.etree_html.xpath(
                    '//span[contains(text(),"von Amazon-Marken")]/../../../../../../../../div//div/@data-csa-c-asin')
            if brand_sp_asin_list:
                self.sp_list_all.extend(brand_sp_asin_list)
                sp_asin_list.extend(brand_sp_asin_list)
                # self.sp_list.extend(self.parse_type_common(asin_list=brand_sp_asin_list, cate_type='sp'))
            asin_list = self.etree_html.xpath('//span[contains(@class,"label-popover-default")]/../../../div//@id')
            if len(asin_list):
                asin_list = [asin.split("-")[-1] for asin in asin_list if len(asin.split("-")[-1]) >= 9]
                self.sp_list_all.extend(asin_list)
                sp_asin_list.extend(asin_list)
                # self.sp_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='sp'))
            # 获取标签下的广告位asin，一般是5个，如 Customers frequently viewed Today's deals
            if self.site_name == 'de':
                sp_label = 'Gesponsert'
            elif self.site_name == 'us' or self.site_name == 'uk':
                sp_label = 'Sponsored'
            else:
                sp_label = 'Sponsored'
            tag_asin_list = self.etree_html.xpath(
                f'//span[@class="a-declarative"]/span[contains(text(),"{sp_label}")]/../../../../../../../../div/following-sibling::span[2]//div/@data-asin|//span/a[contains(text(),"{sp_label}")]/../../../../../../../../div/following-sibling::span[2]//div/@data-asin')
            if tag_asin_list:
                self.sp_list_all.extend(tag_asin_list)
                sp_asin_list.extend(tag_asin_list)
            print('所有广告asin：', len(sp_asin_list), 'brand_sp_asin_list:', len(brand_sp_asin_list), '正常sp asin_list::',
                  len(asin_list), "sp标签下广告位：", len(tag_asin_list))
            _sp_asin_list = []
            if sp_asin_list:
                for _sp_asin in sp_asin_list:
                    if _sp_asin not in _sp_asin_list:
                        _sp_asin_list.append(_sp_asin)
                self.sp_list.extend(self.parse_type_common(asin_list=_sp_asin_list, cate_type='sp'))
        except Exception as e:
            pass

    def parse_sb(self, asin=None, page_row=None):
        # 解析头部sb, cate_type = 1 2024-5-9 修改最新获取。一次性获取所有视频后带着三个商品 的asin，
        # 1 获取头部品牌 包括 视频后的asin 2 只获取尾部。3 只获取视频里链接
        # //div[@data-type="brandLogo"]/a//@href  解析品牌视频
        try:
            cate_type = 1
            asin_list = self.etree_html.xpath('//div[@data-index="6"]//div[@class="_bGlmZ_content_2rsXy"]//@data-asin')
            if len(asin_list) == 0:
                asin_list = self.etree_html.xpath(
                    '//div[@data-avar="desc"]//@data-asin|//div[@data-avar="page"]//@data-asin|//div[@data-avar="deal"]//@data-asin')
            # 解析头部视频后面三个asin，只需要三个asin
            _sbv_list = []
            sbv1_asin_list = self.etree_html.xpath('//li//div[@data-type="productContainer"]/@data-asin')
            if sbv1_asin_list:
                if len(sbv1_asin_list) > 0:
                    asin_list.extend(sbv1_asin_list)
            else:
                # 既是视频又是品牌
                sbv2_asin_list = self.etree_html.xpath(
                    "//a[@data-type='brandedLogoHeaderLink']/@href|//div[@data-type='brandLogo']/a/@href")
                if sbv2_asin_list:
                    sbv1_asin_href = sbv2_asin_list[0]
                    svb_asins_list = re.findall("https://www.*/dp/(.*?)\?", sbv1_asin_href)
                    if len(svb_asins_list) == 0:
                        svb_asins_list = re.findall("asins=(.*?)&", sbv1_asin_href)
                        if svb_asins_list:
                            if len(svb_asins_list[0]) > 10:
                                svb_asins_list[0] = svb_asins_list[0][:10]
                        else:
                            svb_asins_list = re.findall("asins=(.*?)%2", sbv1_asin_href)
                            if svb_asins_list:
                                if len(svb_asins_list[0]) > 10:
                                    svb_asins_list[0] = svb_asins_list[0][:10]
                    else:
                        if len(svb_asins_list[0]) > 10:
                            svb_asins_list[0] = svb_asins_list[0][:10]
                    _sbv_list.extend(svb_asins_list)
            print('头部品牌:', asin_list, '视频后面三个asin:', sbv1_asin_list, '头部视频品牌:', _sbv_list)
            if asin_list:
                self.sb_list_all.extend([asin.replace('/', '') for asin in asin_list])
                if _sbv_list:
                    asin_list.extend(_sbv_list)
                for asin in asin_list:
                    asin_detail_list = [self.search_term, asin, self.page, page_row, cate_type]
                    asin_detail_list.extend(self.parse_detail(asin=asin).values())
                    self.sb_list.append(asin_detail_list)
        except Exception as e:
            pass
        # 解析尾部sb, cate_type = 2 -- 不符合
        try:
            cate_type = 2
            asin_list = self.etree_html.xpath('//div[@class="sb_1LIJTce6"]//a//@href')
            if len(asin_list) == 0:
                asin_list = self.etree_html.xpath(
                    '//a[@class="a-spacing-none a-link-normal _bXVsd_mainImageLink_1UpRh _bXVsd_link_gJc5l _bXVsd_hidden_L-XDK"]//@href|//div[@data-id="track"]/div/div/a/@href|//a[@class="a-link-normal _bXVsd_link_2cNGK _bXVsd_hidden_FUOrV"]/@href')
            asin_list = [re.findall("lp_asins=(.*?)&", asin)[0].split('%2C')[0].replace('/', '') for asin in asin_list
                         if 'lp_asins' in asin]

            print('尾部sb品牌:', asin_list)
            if asin_list:
                # self.sb_list_all.extend([asin.replace('/', '') for asin in asin_list])  #
                for asin in asin_list:
                    asin_detail_list = [self.search_term, asin.replace('/', ''), self.page, page_row, cate_type]
                    asin_detail_list.extend(self.parse_detail(asin=asin).values())
                    self.sb_list.append(asin_detail_list)
        except Exception as e:
            pass
        # 解析视频sb, cate_type = 3
        try:
            cate_type = 3
            # 解析视频 只有一个asin的
            if self.site_name == 'us' or self.site_name == 'uk':
                aria_label = 'Sponsored video'
            elif self.site_name == 'de':
                aria_label = 'Gesponsertes Video'
            else:
                aria_label = 'Sponsored video'

            sp_href = self.etree_html.xpath(
                f'//div[@class="a-section a-spacing-none faceout-product-title"]//a//@href|//a[contains(@aria-label,"{aria_label}")]/@href|//div[contains(@class,"sbv-video")]/a/@href|'
                f'//div[@class="a-section a-spacing-none puis-padding-right-small s-title-instructions-style faceout-product-title"]//a//@href|//video[contains(@aria-label,"Sponsored video")]/parent::a/@href')

            # print("解析视频sb, cate_type = 3:", sp_href)
            asin_list = []
            if len(sp_href):
                for href in sp_href:
                    asins = re.findall("https://www.*/dp/(.*?)\?", href)
                    if len(asins) == 0:
                        asins = re.findall("asins=(.*?)&", href)
                        if asins:
                            if len(asins[0]) > 10:
                                asins[0] = asins[0][:10]
                        else:
                            # 优化sbv 正则只匹配英文 数字，判断
                            asins = re.findall("asins=(.*?)%2", href)
                            if asins:
                                if len(asins[0]) > 10:
                                    asins[0] = asins[0][:10]
                    else:
                        if len(asins[0]) > 10:
                            asins[0] = asins[0][:10]
                    asin_list.extend(asins)
                print("解析视频sb sb_3:", asin_list)
                if asin_list:
                    # self.sb_list_all.extend([asin.replace('/', '') for asin in asin_list])
                    for i in asin_list:
                        asin_detail_list = [self.search_term, i.replace('/', ''), self.page, page_row, cate_type]
                        sb_title = self.etree_html.xpath(
                            '//div[@class="a-section a-spacing-none faceout-product-title"]//../h2//span//text()')
                        sb_img = self.etree_html.xpath('//img[@class="sbv-product-img"]/@src')
                        sb_price = self.etree_html.xpath(
                            '//div[@class="a-section a-spacing-none faceout-product-title"]//..//span[@class="a-offscreen"]//text()')
                        sb_rating = self.etree_html.xpath(
                            '//div[@class="a-section a-spacing-none faceout-product-title"]//..//span[@class="a-icon-alt"]//text()')
                        sb_review = self.etree_html.xpath(
                            '//div[@class="a-section a-spacing-none faceout-product-title"]//..//span[@class="a-size-base"]//text()')
                        sb_title = sb_title[0] if sb_title else None
                        sb_img = sb_img[0] if sb_img else None
                        sb_price = sb_price[0] if sb_price else None
                        sb_rating = sb_rating[0] if sb_rating else None
                        sb_review = sb_review[0] if sb_review else None
                        asin_detail_list.extend([sb_title, sb_img, sb_price, sb_rating, sb_review])
                        self.sb_list.append(asin_detail_list)
            else:
                pass
        except Exception as e:
            import traceback
            print("error:", traceback.format_exc())

    def parse_ac(self):
        try:
            asin_list = self.etree_html.xpath(
                '//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id|//span[contains(@id,"-amazons-choice")]/@id')
            print('ac_asin_list:::',asin_list)
            if len(asin_list):
                asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9]  # 有重复
                self.ac_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='ac'))
            self.ac_list.extend(self.parse_other_type_common(cate_type='ac'))  # 横栏部分
        except Exception as e:
            pass

    def parse_bs(self):
        try:
            asin_list = self.etree_html.xpath(
                "//span[contains(text(),'estseller')]/parent::span//parent::span[contains(@id,'best-seller')]/@id|//span[contains(text(),'Seller')]/parent::span//parent::span[contains(@id,'best-seller')]/@id")
            print('############## bsr_asin::', asin_list)
            if len(asin_list):
                asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9]
                self.bs_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='sb'))
        except Exception as e:
            pass

    def parse_er(self):
        self.er_list.extend(self.parse_other_type_common(cate_type='er'))

    def parse_tr(self):
        self.tr_list.extend(self.parse_other_type_common(cate_type='tr'))

    def parse_hr(self):
        self.hr_list.extend(self.parse_other_type_common(cate_type='hr'))

    def parse_detail(self, asin=None, cate_type=None):
        asin_detail_dict = {
            "title": None,
            "img": None,
            "price": None,
            "rating": None,
            "reviews": None
        }
        try:
            asin_img = self.etree_html.xpath('//div[@data-asin="' + asin + '"]//img//@src')  # 可以解析
            if len(asin_img):
                asin_detail_dict['img'] = asin_img[0]
        except Exception as e:
            pass
        try:
            if cate_type == 'sb':
                asin_title = self.etree_html.xpath(
                    f'//div[@data-asin="{asin}"]//a[@data-type="productTitle"]/text()')  # 可以解析
            else:
                asin_title = self.etree_html.xpath(
                    '//div[@data-asin="' + asin + '"]//span[contains(@class,"a-text-normal")]//text()')  # 可以解析
                if (len(asin_title) == 0):
                    asin_title = self.etree_html.xpath(
                        '//div[@data-asin="' + asin + '"]//span[@class="a-truncate-cut"]//text()')  # 失效
            asin_detail_dict['title'] = asin_title[0]
        except Exception as e:
            pass
        try:
            if cate_type == 'sb':
                asin_review = self.etree_html.xpath(
                    f'//a[contains(@aria-label,"Sponsored video") or contains(@href,"{asin}")]//span[contains(@class,"a-size-base")]/text()')
                if len(asin_review) == 0:
                    asin_review = self.etree_html.xpath(
                        f'//a[contains(@href,"{asin}")]//span[contains(@class,"a-size-base")]/text()')
            else:
                asin_review = self.etree_html.xpath(
                    '//div[@data-asin="' + asin + '"]//span[@class="a-size-base"]//text()')
                if len(asin_review) == 0:
                    asin_review = self.etree_html.xpath(
                        '//div[@data-asin="' + asin + '"]//span[@class="a-color-link"]//text()')
                    if len(asin_review) == 0:
                        asin_review = self.etree_html.xpath(
                            f'//div[@data-asin="{asin}"]//span[contains(@class,"a-size-base")]/parent::a/parent::span/@aria-label')
            asin_detail_dict['reviews'] = asin_review[0]
        except Exception as e:
            pass
        try:
            if cate_type == 'sb':
                asin_rating = self.etree_html.xapth(
                    f'//a[contains(@href,"{asin}")]/parent::div/parent::div/parent::div/parent::div//span/@aria-label')
            else:
                asin_rating = self.etree_html.xpath(
                    '//div[@data-asin="' + asin + '"]//span[@class="a-icon-alt"]//text()')
            asin_detail_dict['rating'] = asin_rating[0]
        except Exception as e:
            pass
        try:
            if cate_type == 'sb':
                asin_price = self.etree_html.xpath(f'//a[contains(@href,"{asin}")]/span[@class="a-price"]/span/text()')
            else:
                asin_price = self.etree_html.xpath(
                    '//div[@data-asin="' + asin + '"]//span[@class="a-offscreen"]//text()')
            asin_detail_dict['price'] = asin_price[0]
        except Exception as e:
            pass
        return asin_detail_dict

    def run(self):
        self.parse_sb()  # 优先级1 -- 排除时只排除头部广告位的asin
        self.parse_sp()  # 优先级2 -- 排除包含sp的asin
        self.parse_zr()  # 优先级3 -- 通过data-asin取到的所有asin，排除sb、sp
        self.parse_ac()
        self.parse_bs()
        self.parse_er()
        self.parse_tr()
        self.parse_hr()
        self.parse_buy()
        self.parse_sold_quantity()
        print("self.list_zr:", len(self.zr_list), '页数：', self.page, 'page')
        print("self.list_sp:", len(self.sp_list))
        print('self.hr_list::', len(self.hr_list))
        # print("self.list_sb:", self.sb_list)
        "https://www.amazon.co.uk/dp/B09FLQD7VN?pd_rd_i=B09FLQD7VN&pd_rd_w=GwsFh&pf_rd_p=88aa1216-6e73-4bd1-9903-e6883ff8dae3&pd_rd_wg=2kZM8&pf_rd_r=P8P1KCGMPXS9XWH1NFQV&pd_rd_r=a7c81c84-a2aa-47ad-8bd9-055c75c99a28"
        "https://www.amazon.co.uk/dp/B09FLQD7VN?pd_rd_i=B09FLQD7VN&pd_rd_w=GwsFh&pf_rd_p=88aa1216-6e73-4bd1-9903-e6883ff8dae3&pd_rd_wg=2kZM8&pf_rd_r=P8P1KCGMPXS9XWH1NFQV&pd_rd_r=a7c81c84-a2aa-47ad-8bd9-055c75c99a28"
        return (self.zr_list, self.sp_list, self.sb_list, self.ac_list,
                self.bs_list, self.er_list, self.tr_list, self.sold_list, self.buy_text_list, self.hr_list)
