"解析asin详情数据"
import html as html_module  # 为标准库的 html 模块设置别名
import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import re
from lxml import etree
import datetime
from threading import Lock
import json


class ParseAsinUs(object):
    def __init__(self, resp=None, asin=None, week=None, date_info=None, data_type_asin=1, site_name=None, month=None):
        print(f'请求 {site_name} 站点数据')
        self.page_source = resp
        self.response_s = etree.HTML(resp)
        self.asin = asin
        self.all_img_video_list = []
        self.asin_variation_list = []
        self.buyBoxname_asin_list = []
        self.month = month
        self.week = week
        self.date_info = date_info
        self.data_type_asin = data_type_asin
        self.buyBox_list = []
        self.bs_category_asin_list_pg = []  # 存储 asin 详情 bsr 文本类目
        self.site_name = site_name

    def re_weight(self, weight):
        tiem_list = ["June", "April", "January", "October",
                     "November", "August",
                     "March", "December", "July", "September",
                     "Feb", "May", "February"]
        for i in tiem_list:
            if i in weight:
                return 0
        if self.site_name != 'us':
            if '.' in weight or ',' in weight:
                wg = re.findall(r'\d+.', weight)
            else:
                wg = re.findall(r'\d', weight)
        else:
            if '.' in weight:
                wg = re.findall(r'\d+.', weight)
            else:
                wg = re.findall(r'\d', weight)
        Weight = ''.join(wg).replace(',', '.')
        return Weight

    def price_replace(self, price, site):
        if site in ['de', 'fr', 'es', 'it']:
            chars_to_remove = ['€', '\xa0', '.', '£']
            for char in chars_to_remove:
                price = price.replace(char, '')
        else:
            chars_to_remove = ['$', '€', '\xa0', '£']
            price = price.strip()
            for char in chars_to_remove:
                price = price.replace(char, '')
        return price

    def get_wp(self, type_str):
        wp_list = self.response_s.xpath('//*[@id="detailBullets_feature_div"]/ul/li//span/span[2]/text()')
        for wp in wp_list:
            if type_str == 'Weight':
                if self.site_name in ['uk', 'de', 'fr', 'es', 'it']:
                    if ('logramm' in wp) or (' g' in wp) or (' kg' in wp) or ("gram" in wp) or ("rams" in wp):
                        Weight = wp.strip()
                        return Weight
                else:
                    if ('ounds' in wp) or ("unces" in wp) or ('grams' in wp):
                        Weight = wp.strip()
                        return Weight
            elif type_str == 'Package':
                if 'nche' in wp or "cm" in wp or 'centimetres' in wp or (wp.count('x') == 2 and '"D' in wp):
                    Package = wp.strip()
                    return Package
        return None

    def re_buy_Box(self, ASIN_XPATH):
        for i in ASIN_XPATH['invnetroySelect2']:
            buy_Box_list = self.response_s.xpath(i)
            if len(buy_Box_list) > 0:
                if "Add to Basket" in buy_Box_list[0] or "Add to Cart" in buy_Box_list[0] or 'In Stock' in buy_Box_list[
                    0] or 'Auf Lager' in buy_Box_list[0] or 'See All Buying Options' in buy_Box_list[0]:
                    page_inventory = 2
                else:
                    page_inventory = 3
                break
            else:
                page_inventory = 3
        return page_inventory

    def re_buy_sller(self, td_0_text, td_1_text):
        if (td_0_text[0] == self.host and td_1_text[0] == self.host) or (
                'Amazon' in td_0_text[0] and 'Amazon' in td_1_text[0]):
            buy_box_seller_type = 1  # 自营
        else:
            if td_0_text[0] == td_1_text[0]:
                buy_box_seller_type = 3  # FBM
            else:
                buy_box_seller_type = 2  # FBA
        return buy_box_seller_type

    # 获取 按功能划分的客户评分数据 B08YJMCQ75
    def get_customer_reviews(self, span_list):
        # '[{" Light weight ": "4.9"}, {" Suction power ": "4.7"}, {" Easy to clean ": "4.6"}, {" Sheerness ": "4.6"}]'
        # 提取属性和评分，存储在新的列表中
        extracted_data = []
        for i in range(0, len(span_list), 3):
            extracted_data.append(span_list[i].strip())
            extracted_data.append(span_list[i + 1])
        # 输出提取的数据
        customer_reviews_list = []
        # 遍历去重后的列表，每两个元素为一组，创建新的字典对象并添加到列表中
        if extracted_data:
            for i in range(0, len(extracted_data), 2):
                key = extracted_data[i]
                value = extracted_data[i + 1]
                # 创建新的字典对象
                customer_reviews_json = {}
                customer_reviews_json[key] = value
                customer_reviews_list.append(customer_reviews_json)
            if customer_reviews_list:
                customer_reviews_dict = json.dumps(customer_reviews_list, ensure_ascii=False)
            else:
                customer_reviews_dict = None
        else:
            customer_reviews_dict = None
        return customer_reviews_dict

    # 获取详情 各个广告类型的asin，
    def get_initial_seen_asins(self, resp, sp_type):
        initialSeenAsins_list = re.findall(r'initialSeenAsins&quot;:(.*);],&quot', resp)
        if initialSeenAsins_list:
            initial_seen_asins_str = initialSeenAsins_list[0] + ']'
        else:
            initialSeenAsins_list = re.findall(r'initialSeenAsins\":(.*)],\"set', resp)
            initial_seen_asins_str = initialSeenAsins_list[0] + ']'
        pattern = re.compile(r'B\w+')
        initialSeenAsins = pattern.findall(initial_seen_asins_str)
        print(self.asin, sp_type, ' ===========initialSeenAsins:============= ', initialSeenAsins)
        SeenAsins_list = []
        if initialSeenAsins:
            for SeenAsins in initialSeenAsins:
                if sp_type == 'sp_products':
                    sp_data_json = self.response_s.xpath(
                        f'//div[contains(@id,"sp_detail2_{SeenAsins}")]/@data-adfeedbackdetails')
                elif sp_type == '4_stars':
                    sp_data_json = self.response_s.xpath(
                        f'//div[contains(@id,"sp_detail_thematic-highly_rated_{SeenAsins}")]/@data-adfeedbackdetails')
                elif sp_type == 'delivery_sp':
                    sp_data_json = self.response_s.xpath(
                        f'//div[contains(@id,"sp_detail2-prime_theme_for_non_prime_members_{SeenAsins}")]/@data-adfeedbackdetails')
                else:
                    sp_data_json = None
                initialSeenAsins_dict = {}
                SeenAsins_title = self.response_s.xpath(f'//a[contains(@href,"{SeenAsins}")]/@title')
                SeenAsins_title = SeenAsins_title[0] if SeenAsins_title else None
                SeenAsins_src_list = self.response_s.xpath(f'//a[contains(@href,"{SeenAsins}")]//img/@src')
                if SeenAsins_src_list:
                    if len(SeenAsins_src_list[0]) > 400:
                        SeenAsins_srcs = SeenAsins_src_list[-1]
                    else:
                        SeenAsins_srcs = SeenAsins_src_list[0]
                else:
                    SeenAsins_srcs = None
                SeenAsins_total_comments = self.response_s.xpath(
                    f'//a[contains(@href,"{SeenAsins}")]/i/following-sibling::span/text()')
                SeenAsins_total_comments = SeenAsins_total_comments[0] if SeenAsins_total_comments else None
                SeenAsins_price = self.response_s.xpath(
                    f'//a[contains(@href,"{SeenAsins}")]/span[contains(@class,"-price")]/text()')
                SeenAsins_price = SeenAsins_price[0] if SeenAsins_price else None
                initialSeenAsins_dict['seen_asins'] = SeenAsins
                initialSeenAsins_dict['seen_asins_title'] = SeenAsins_title
                initialSeenAsins_dict['seen_asins_src'] = SeenAsins_srcs
                initialSeenAsins_dict['seen_asins_total_comments'] = SeenAsins_total_comments
                initialSeenAsins_dict['seen_asins_price'] = SeenAsins_price
                initialSeenAsins_dict['sp_data_json'] = sp_data_json[0] if sp_data_json else None
                SeenAsins_list.append(initialSeenAsins_dict)
        if SeenAsins_list:
            SeenAsins_json = json.dumps(SeenAsins_list, ensure_ascii=False)
            return SeenAsins_json
        else:
            return None

    def get_price(self):
        i = f"//div[@data-csa-c-asin='{self.asin}']//span[@class='a-declarative']/a[contains(@href,'{self.asin}')]/span/text()|//div[@id='centerCol']//span[contains(@id,'size_name_')]/span/text()|//div[@id='centerCol']//span[contains(@id,'style_name_')]/span/text()|//div[@id='centerCol']//span[contains(text(),'$')]/text()"
        ele_price = self.response_s.xpath(i)
        print('ele_price::', ele_price)
        if ele_price:
            try:
                if self.site_name in ['de', 'fr', 'es', 'it']:
                    price_2 = self.price_replace(ele_price[0], self.site_name)
                    p = re.findall(r'\d.+', price_2)
                    print('二次价格获取小语言：', p)
                    price = p[0].replace(",", '.')
                else:
                    p = re.findall(r'\$\d.+', ele_price[0])
                    print('二次价格获取us：', p)
                    price = self.price_replace(p[0], self.site_name)
            except:
                price = -1
                return price
        else:
            price = None
        return price

    def pageinventory(self, pageinventory_list):
        # 判断列表是否包含数字
        contains_number = any(char.isdigit() for item in pageinventory_list for char in str(item))
        if contains_number:
            print("列表包含数字")
            return False
        else:
            print("列表不包含数字")
            return True

    def clean_string(self, string):
        return string.strip().replace('\u200f', '').replace('  ', '').replace('\n', '').replace('\u200e', '')

    # 变体
    def add_variation(self, asin, color, size, style, state, parentAsin, other_name):
        self.asin_variation_list.append([asin, color, parentAsin, size, state, style, other_name])

    def get_review(self, html, site_name):
        reviews_all_dict = {site_name: "//span[@data-hook='cr-widget-FocalReviews']",
                            'other': "//span[@class='global-reviews-all']"}
        review_json_list = []
        for key_site, value_xpath in reviews_all_dict.items():
            div_id_list = html.xpath(value_xpath + "//li[@data-hook='review']/@id")
            for div_id in div_id_list:
                user_href_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//div[@class='a-row a-spacing-mini']/a/@href")
                user_href = self.site_url + user_href_list[0] if user_href_list else None
                user_img_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//div[@class='a-row a-spacing-mini']//img/@data-src")
                user_img = self.site_url + user_img_list[0] if user_img_list else None
                user_name_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//div[@class='a-row a-spacing-mini']//span[@class='a-profile-name']/text()")
                user_name = user_name_list[0] if user_name_list else None
                review_star_rating_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//i[contains(@data-hook,'review-star-rating')]//text()")
                review_star_rating = review_star_rating_list[0] if review_star_rating_list else None
                if key_site == 'other':
                    review_title_list = html.xpath(
                        f"{value_xpath}//li[@id='{div_id}']//span[@data-hook='review-title']/span/text()")
                    review_title = review_title_list[0] if review_title_list else None
                else:
                    review_title_list = html.xpath(
                        f"{value_xpath}//li[@id='{div_id}']//a[@data-hook='review-title']/span/text()")
                    review_title = review_title_list[0] if review_title_list else None
                review_date_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//span[@data-hook='review-date']/text()")
                review_date = review_date_list[0] if review_date_list else None
                review_href_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//div[@class='a-row']//a/@href")
                review_href = self.site_url + review_href_list[0] if review_href_list else None
                var_data_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//span[@data-hook='format-strip-linkless']//text()")
                var_data = '|'.join(var_data_list) if var_data_list else None
                var_asin_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//div[@class='a-row a-spacing-mini review-data review-format-strip']//a/@href")
                if var_asin_list:
                    varasin_list = re.findall(r'reviews/(.*)/ref', var_asin_list[0])
                    var_asin = varasin_list[0] if varasin_list else None
                else:
                    var_asin = None
                vp_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//a[contains(@aria-label,'Verified Purchase')]//span/text()")
                verified_purchase = vp_list[0] if vp_list else None

                review_data_list = html.xpath(
                    f"{value_xpath}//li[@id='{div_id}']//span[@data-hook='review-body']//div[@data-hook='review-collapsed']/span/text()")
                review_data_list = ''.join(review_data_list).strip()
                review_data = review_data_list if review_data_list else None
                items = {
                    'title': review_title,
                    'content': review_data,
                    'model': var_data,
                    'rating': review_star_rating,
                    'userName': user_name,
                    "commentTime": review_date,
                    "commentId": div_id,
                    'country': key_site,
                }
                review_json_list.append(items)
        if review_json_list:
            review_json = json.dumps(review_json_list,ensure_ascii=False)
            return review_json
        else:
            return None

    def xpath_html(self):
        if self.site_name == "us":
            from utils.params_asin_xpath import US_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.com'
            self.host = 'Amazon.com'
        elif self.site_name == 'uk':
            from utils.params_asin_xpath import UK_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.co.uk'
            self.host = 'Amazon.co.uk'
        elif self.site_name == 'de':
            from utils.params_asin_xpath import DE_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.de'
            self.host = 'Amazon.de'
        elif self.site_name == 'fr':
            from utils.params_asin_xpath import FR_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.fr'
            self.host = 'Amazon.fr'
        elif self.site_name == 'es':
            from utils.params_asin_xpath import ES_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.es'
            self.host = 'Amazon.es'
        elif self.site_name == 'it':
            from utils.params_asin_xpath import IT_ASIN_XPATH as ASIN_XPATH
            self.site_url = 'https://www.amazon.it'
            self.host = 'Amazon.it'

        try:
            # 分别抓取所有 carousel 的 data-options 和它们的标题 h2
            data_options_list = self.response_s.xpath(
                "//div[@data-marketplaceid='ATVPDKIKX0DER']/@data-a-carousel-options")
            h2_list = self.response_s.xpath("//div[@data-marketplaceid='ATVPDKIKX0DER']//h2/text()")
            result = {}
            result_sp = {}
            result_list = []
            # Customers also search us_B0D4QGW5RX.html
            data_sp_list = self.response_s.xpath(
                "//div[@class='a-column a-span8']/h2[contains(@class,'carousel-heading')]/text()")
            for sp_h2 in data_sp_list:
                print(sp_h2)
                if sp_h2 != 'Videos':
                    data_sp = self.response_s.xpath(
                        f"""//div[@class='a-column a-span8']/h2[contains(text(),"{sp_h2}")]/parent::div/parent::div/parent::div/parent::div/@data-a-carousel-options""")
                    if data_sp:
                        decoded_sp = html_module.unescape(data_sp[0])
                        decoded_sp = json.loads(decoded_sp)
                        if decoded_sp.get('ajax'):
                            inner_sp_h2_list = decoded_sp.get('ajax', {}).get('id_list', [])
                            sp_h2_asin_list = [item.split('|')[0] for item in inner_sp_h2_list]
                            if sp_h2_asin_list:
                                result_sp[sp_h2] = sp_h2_asin_list
            if result_sp:
                result_list.append(result_sp)
            if h2_list and data_options_list:
                count = min(len(data_options_list), len(h2_list))
                for i in range(count):
                    raw_json_str = data_options_list[i]
                    title = h2_list[i].strip()
                    # 解码 → 解析 → 提取 id 列表
                    decoded = html_module.unescape(raw_json_str)
                    outer = json.loads(decoded)
                    inner_list = outer.get('ajax', {}).get('id_list', [])
                    asin_list = [json.loads(item)['id'] for item in inner_list]
                    result[title] = asin_list
                if result:
                    result_list.append(result)
            h2_str_list = self.response_s.xpath(
                '//h2[contains(@class,"a-spacing-medium")]/text()|//div[@class="a-column a-span8"]/h2[contains(@class,"carousel-heading")]/text()')
            if h2_str_list:
                for h2_str in h2_str_list:
                    if h2_str != 'Videos':
                        data_asin_list = self.response_s.xpath(
                            f"""//h2[contains(text(),"{h2_str}")]/parent::div/parent::div//@data-asin|//h2[contains(text(),"{h2_str}")]/parent::div/parent::div/parent::div//@data-asin""")
                        print('h2_str_list::', h2_str, data_asin_list)
                        if data_asin_list:
                            result[h2_str] = data_asin_list
                            result_list.append(result)
            print('result_list 广告流量ASIN:', result_list)
            if result_list:
                # 2. 去重
                seen = set()
                unique = []
                for rec in result_list:
                    # 用 sorted keys + json.dumps 保证同样内容能匹配
                    key = json.dumps(rec, sort_keys=True)
                    if key not in seen:
                        seen.add(key)
                        unique.append(rec)
                # 3. 如果需要，再把结果转回 JSON 字符串
                result_list_json = json.dumps(unique, ensure_ascii=False)
            else:
                result_list_json = None

        except Exception as e:
            print('解析广告位报错：', e)
            result_list_json = None

        # 广告 Products related to this item
        sp_type_list = []
        sp_seen_asins_json = None
        for i in ASIN_XPATH['sp_num']:
            set_size_list = self.response_s.xpath(i)
            if set_size_list:
                try:
                    set_size = re.findall(r'"set_size":(\d+)\,"filteredItems', set_size_list[0])
                    sp_num = int(set_size[0])
                    sp_seen_asins_json = self.get_initial_seen_asins(set_size_list[0], 'sp_products')
                except:
                    sp_num = 0
                break
            else:
                sp_num = 0
        # 4 stars and above 广告
        sp_4stars_seen_asins_json = None
        for i in ASIN_XPATH['stars_sp_list']:
            stars_sp_list = self.response_s.xpath(i)
            if stars_sp_list:
                try:
                    stars_sp_num = re.findall(r'"set_size":(\d+)\,"filteredItems', stars_sp_list[0])[0]
                    sp_4stars_seen_asins_json = self.get_initial_seen_asins(stars_sp_list[0], '4_stars')
                except:
                    stars_sp_num = '0'
                break
            else:
                stars_sp_num = '0'
        # Related products with free delivery on eligible orders 广告
        sp_delivery_seen_asins_json = None
        for i in ASIN_XPATH['Delivery_sp_list']:
            Delivery_sp_list = self.response_s.xpath(i)
            if Delivery_sp_list:
                try:
                    Delivery_sp_num = re.findall(r'"set_size":(\d+)\,"filteredItems', Delivery_sp_list[0])[0]
                    sp_delivery_seen_asins_json = self.get_initial_seen_asins(Delivery_sp_list[0], 'delivery_sp')
                except:
                    Delivery_sp_num = '0'
                break
            else:
                Delivery_sp_num = '0'
        sp_type_list.append([str(sp_num), stars_sp_num, Delivery_sp_num])
        sp_type = ','.join(sp_type_list[0])
        # B081PN2PZQ mix &match
        min_match_asin_data_list = []
        min_match_asin_json = None
        for i in ASIN_XPATH['min_match_list']:
            min_match_list_asin_list = self.response_s.xpath(i)
            if min_match_list_asin_list:
                try:
                    for bundlesAsin in min_match_list_asin_list:
                        together_asin_dict = {}
                        if bundlesAsin != self.asin:
                            min_match_asin_titles = self.response_s.xpath(
                                f'//a[contains(@href,"{bundlesAsin}")]//span/text()')
                            min_match_asin_title = min_match_asin_titles[0] if min_match_asin_titles else None
                            min_match_asin_srcs_list = \
                                self.response_s.xpath(f'//a[contains(@href,"{bundlesAsin}")]//img/@data-src')
                            if min_match_asin_srcs_list:
                                if len(min_match_asin_srcs_list[0]) > 400:
                                    min_match_asin_src = min_match_asin_srcs_list[-1]
                                else:
                                    min_match_asin_src = min_match_asin_srcs_list[0]
                            else:
                                min_match_asin_src = None
                            min_match_asin_prices = self.response_s.xpath(
                                f'//a[contains(@href,"{bundlesAsin}")]/parent::div//div/span[contains(@class,"a-price")]/span/text()')
                            min_match_asin_price = min_match_asin_prices[0] if min_match_asin_prices else None
                            min_match_asin_total_comments = self.response_s.xpath(
                                f'//a[contains(@href,"{bundlesAsin}")]/parent::div//div/i[contains(@class,"-star-small")]/span/text()')
                            min_match_asin_total_comment = min_match_asin_total_comments[
                                0] if min_match_asin_total_comments else None
                            together_asin_dict['min_match_asin'] = bundlesAsin
                            together_asin_dict['min_match_asin_title'] = min_match_asin_title
                            together_asin_dict['min_match_asin_src'] = min_match_asin_src
                            together_asin_dict['min_match_asin_price'] = min_match_asin_price
                            together_asin_dict['min_match_asin_total_comment'] = min_match_asin_total_comment
                            min_match_asin_data_list.append(together_asin_dict)
                except:
                    break
        if min_match_asin_data_list:
            min_match_asin_json = json.dumps(min_match_asin_data_list, ensure_ascii=False)
        # bundles_this_asins ,Bundles with this item B0BPV8R4K8 变体下方位置。和五点描述挨着
        bundles_this_asins_data_list = []
        bundles_this_asins_data_json = None
        for i in ASIN_XPATH['bundles_this_asins']:
            bundles_this_asins_list = self.response_s.xpath(i)
            if bundles_this_asins_list:
                try:
                    for bundles_Asins in bundles_this_asins_list:
                        bundles_Asins_dict = {}
                        if bundles_Asins != self.asin:
                            bundles_asin_titles = self.response_s.xpath(
                                f'//a[contains(@href,"{bundles_Asins}")]/@title')
                            bundles_asin_title = bundles_asin_titles[-1] if bundles_asin_titles else None
                            bundles_asin_srcs_list = \
                                self.response_s.xpath(f'//a[contains(@href,"{bundles_Asins}")]/parent::div//img/@src')
                            if bundles_asin_srcs_list:
                                if len(bundles_asin_srcs_list[0]) > 400:
                                    bundles_asin_src = bundles_asin_srcs_list[-1]
                                else:
                                    bundles_asin_src = bundles_asin_srcs_list[0]
                            else:
                                bundles_asin_src = None
                            bundles_asin_prices = self.response_s.xpath(
                                f'//a[contains(@href,"{bundles_Asins}")]/parent::div//div[contains(@class,"bundle-price")]/span[contains(@class,"buying-price")]/text()')
                            bundles_asin_price = bundles_asin_prices[0] if bundles_asin_prices else None
                            bundles_Asins_dict['bundles_Asins'] = bundles_Asins
                            bundles_Asins_dict['bundles_asin_title'] = bundles_asin_title
                            bundles_Asins_dict['bundles_asin_src'] = bundles_asin_src
                            bundles_Asins_dict['bundles_asin_price'] = bundles_asin_price
                            bundles_this_asins_data_list.append(bundles_Asins_dict)
                except:
                    break
        if bundles_this_asins_data_list:
            bundles_this_asins_data_json = json.dumps(bundles_this_asins_data_list, ensure_ascii=False)
        # 捆绑销售 B0DD8W2DZD This bundle contains 2 items
        href_asin_list = self.response_s.xpath(
            "//div[@class='bundle-title']/following-sibling::div//div[@class='bundle-components']//div[contains(@id,'bundle-component-details-component-title')]/a/@href")
        bundle_asin_component_list = []
        if href_asin_list:
            bundle_component_asin_list = []
            for href_asin in href_asin_list:
                i_asin_list = re.findall(r'/dp/(.*)', href_asin)
                bundle_component_asin_list.append(i_asin_list[0])
            if bundle_component_asin_list:
                bundle_component_asin_list = list(set(bundle_component_asin_list))
                for bundle_component_asin in bundle_component_asin_list:

                    print('bundle_component_asin:', bundle_component_asin)
                    bundle_title_list = self.response_s.xpath(
                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div[contains(@id,'component-details-component-title')]/a/text()")
                    bundle_asin_title = bundle_title_list[0] if bundle_title_list else None
                    bundle_img_list = self.response_s.xpath(f"//a[contains(@href,'{bundle_component_asin}')]/img/@src")
                    bundle_asin_img = bundle_img_list[0] if bundle_img_list else None
                    bundle_review_list = self.response_s.xpath(
                        rf"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review')]//following-sibling::span/text()")
                    bundle_asin_review = bundle_review_list[0] if bundle_review_list else None
                    bundle_starslist = self.response_s.xpath(
                        rf"//a[contains(@href,'{bundle_component_asin}')]/i[contains(@class,'component-details-component-review-stars')]/@class")
                    print('bundle_starslist::', bundle_starslist)
                    bundle_stars = bundle_starslist[0] if bundle_starslist else None
                    if bundle_stars:
                        bundle_stars_list = re.findall(r'a-star-(.*?) ', bundle_stars)
                        bundle_asin_star = bundle_stars_list[0].replace('-', '.') if bundle_stars_list else None
                    else:
                        bundle_asin_star = None
                    bundle_asin_price_list = self.response_s.xpath(
                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::div[contains(@class,'component-details-component-prices')]/span/text()")
                    bundle_asin_price = bundle_asin_price_list[0] if bundle_asin_price_list else None
                    bundle_asin_point_list = self.response_s.xpath(
                        f"//a[contains(@href,'{bundle_component_asin}')]/parent::div/following-sibling::ul/li[contains(@id,'component-details-component-bullet-point')]/span/text()")
                    bundle_asin_point = '|-|'.join(bundle_asin_point_list) if bundle_asin_point_list else None
                    bundle_component_asin_item = {"bundle_component_asin": bundle_component_asin,
                                                  "bundle_asin_title": bundle_asin_title,
                                                  'bundle_asin_img': bundle_asin_img,
                                                  "bundle_asin_review": bundle_asin_review,
                                                  "bundle_asin_star": bundle_asin_star,
                                                  "bundle_asin_price": bundle_asin_price,
                                                  "bundle_asin_point": bundle_asin_point}
                    bundle_asin_component_list.append(bundle_component_asin_item)
        if bundle_asin_component_list:
            bundle_asin_component_json = json.dumps(bundle_asin_component_list)
        else:
            bundle_asin_component_json = None
        # 五点描述
        for i in ASIN_XPATH['five_data']:
            five_text_list = self.response_s.xpath(i)
            if five_text_list:
                text_list = []
                for f in five_text_list:
                    if len(f) > 10:
                        text_list.append(f.strip())
                if text_list:
                    ppd_data_five = '|-|'.join(text_list)
                else:
                    ppd_data_five = None
                break
            else:
                ppd_data_five = None
        # 产品信息描述B083BCTLP5 五点描述上方 product_attribute
        product_json = None
        td_dict = {}
        for i in ASIN_XPATH['product_attribute']:
            td_list = self.response_s.xpath(i)
            if td_list:
                for td in td_list:
                    td_key_list = td.xpath('.//text()')
                    td_key = ''.join(td_key_list).strip()
                    td_value_list = td.xpath('./following-sibling::td//span//text()')
                    try:
                        td_value = ''.join(td_value_list).strip()
                        td_dict[td_key] = td_value
                    except:
                        pass
                break
        if product_json is None:
            for i in ASIN_XPATH['product_attribute_1']:
                div_list = self.response_s.xpath(i)
                if div_list:
                    i = 1
                    for div in div_list:
                        try:
                            span1_list = div.xpath('./div[1]/span/span/text()')
                            span2_list = div.xpath(f'./div[{i + 1}]/span/span/text()')
                            span1_text = ''.join(span1_list)
                            span2_text = ''.join(span2_list)
                            td_dict[span1_text] = span2_text
                        except:
                            pass
                    break
        if len(td_dict) < 1:
            product_json = None
        else:
            filtered_dict = {key: value for key, value in td_dict.items() if value != ''}
            product_json = json.dumps(filtered_dict, ensure_ascii=False)

        # 产品底部属性。描述信息
        productdetail = {}
        for i in ASIN_XPATH['proddetails_list_1']:
            span_list = self.response_s.xpath(i)
            if span_list:
                for span in span_list:
                    span_key_list = span.xpath('./text()')
                    span_key = ''.join(span_key_list).strip()
                    span_value_list = span.xpath('./following-sibling::span/text()')
                    td_value = ''.join(span_value_list)
                    productdetail[span_key] = td_value
                break
        if len(productdetail) == 0:
            for i in ASIN_XPATH['proddetails_list']:
                th_list = self.response_s.xpath(i)
                if th_list:
                    for th in th_list:
                        try:
                            th_key = th.xpath("./text()")[0].strip()
                            td_value = th.xpath('./following-sibling::td//text()')[0].strip()
                            productdetail[th_key] = td_value
                        except:
                            pass
                    break
        if len(productdetail) == 0:
            productdetail_json = None
        else:
            cleaned_dict = {self.clean_string(key): self.clean_string(value) for key, value in productdetail.items()}
            prcdt_dict = {key: value for key, value in cleaned_dict.items() if value != ''}
            productdetail_json = json.dumps(prcdt_dict, ensure_ascii=False)
        # 分类
        for i in ASIN_XPATH['category_href']:
            els_category_href_list = self.response_s.xpath(i)
        for i in ASIN_XPATH['category_data']:
            els_category = self.response_s.xpath(i)
            if els_category:
                category_list = []
                for e in els_category:
                    category_list.append(e.strip())
                category = ''.join(category_list)
                node_id = re.findall(r'node=(\d+)', els_category_href_list[-1])[0]
                break
            else:
                category = None
                node_id = None
        # 解析标题
        for i in ASIN_XPATH['title']:
            title_ = self.response_s.xpath(i)
            if title_:
                title = title_[0].strip()
                break
            else:
                title = None

        # 图片是否可以放大 1 可以 0 不可以
        if 'playVideoInImmersiveView' in self.page_source:
            ImmersiveView = re.findall(r'playVideoInImmersiveView(.*?)\,', self.page_source)
            if 'true' in ''.join(ImmersiveView):
                image_view = 1
            else:
                image_view = 0
        else:
            image_view = 0
        video_m3u8_list = []
        script_outer_list = self.response_s.xpath(
            "//div[@id='dp']//div[@data-feature-name='imageBlockVariations']//script//text()")
        if script_outer_list:
            try:
                if '.m3u8' in script_outer_list[0]:
                    m3u8_list = re.findall(r'"url":"(.*?)m3u8', script_outer_list[0])
                    for m3u8_url in m3u8_list:
                        video_m3u8_list.append(m3u8_url + 'm3u8')
                    if video_m3u8_list:
                        video_m3u8 = '|-|-||-|'.join(video_m3u8_list)
                    else:
                        video_m3u8 = None
                else:
                    video_m3u8 = None
            except:
                video_m3u8 = None
        else:
            video_m3u8 = None

        # 解析图片url
        for i in ASIN_XPATH['image']:
            image = self.response_s.xpath(i)
            if image:
                image = image[0]
                break
            else:
                image = None
        # 標題長度
        if title:
            title_len = len(title)
        else:
            title_len = None
        # 获取图片张数和url
        for i in ASIN_XPATH['imageThumbnail']:
            imageThumbnail_list = self.response_s.xpath(i)
            if imageThumbnail_list:
                video_img_count_num = len(imageThumbnail_list)  # 统计产品图片张数包括视频图片
                if ('play-' in imageThumbnail_list[-1] and len(imageThumbnail_list[-1]) > 82) or (
                        'video.' in imageThumbnail_list[-1] and len(imageThumbnail_list[-1]) > 99):
                    imageThumbnail_list.pop()
                data_type = 1
                img_num = 0
                for imageThumbnail_url in imageThumbnail_list:
                    img_num += 1
                    self.all_img_video_list.append([self.asin, imageThumbnail_url, img_num, data_type])
                break
            else:
                video_img_count_num = 0
        # 是否有视频
        video_A_type_list = []
        for i in ASIN_XPATH['videoThumbnail']:
            videoThumbnail_list = self.response_s.xpath(i)
            if videoThumbnail_list:
                if ('play-' in videoThumbnail_list[-1] and len(videoThumbnail_list[-1]) > 82) or (
                        'video.' in videoThumbnail_list[-1] and len(videoThumbnail_list[-1]) > 99):
                    video_A_type_list.append('2')
                    data_type = 2
                    video_num = len(videoThumbnail_list)
                    self.all_img_video_list.append([self.asin, videoThumbnail_list[-1], video_num, data_type])
                break
        # A+
        for i in ASIN_XPATH['aplus_img']:
            aplus_img_list = self.response_s.xpath(i)
            if aplus_img_list:
                if len(aplus_img_list) >= 2:
                    aplus_img_src_url = aplus_img_list[1]
                    data_type = 3
                    aplus_num = 1
                    video_A_type_list.append('3')
                    self.all_img_video_list.append([self.asin, aplus_img_src_url, aplus_num, data_type])
                break

        if video_A_type_list:
            video_A_type = '1,' + ','.join(video_A_type_list)
        else:
            video_A_type = '1'
        # 判断是否ip被反爬没有底部信息
        div_id_list = self.response_s.xpath(
            "//div[@id='btf_arenas']|//div[@data-csa-c-content-id='aplusBrandStory']|//div[contains(@id,'btfContent')]|//div[@data-csa-c-content-id='buffetServiceCard']|//h2[contains(text(),'Product details')]")
        # 产品描述
        for i in ASIN_XPATH['products_list']:
            products_text_list = self.response_s.xpath(i)
            if products_text_list:
                products_text = '|-|'.join(products_text_list).strip()
                if len(products_text) > 20:
                    product_description = products_text
                    break
                else:
                    product_description = None
            else:
                product_description = None
        # 星级
        for i in ASIN_XPATH['rating']:
            ele_rating = self.response_s.xpath(i)
            if ele_rating:
                if self.site_name == 'de':
                    ele_rating = re.findall(r"(.*) von", ele_rating[0])
                elif self.site_name == 'fr':
                    ele_rating = re.findall(r"(.*) sur", ele_rating[0])
                elif self.site_name == 'it':
                    ele_rating = re.findall(r"(.*) su", ele_rating[0])
                elif self.site_name == 'es':
                    ele_rating = re.findall(r"(.*) de", ele_rating[0])
                else:
                    ele_rating = re.findall(r"(.*) out", ele_rating[0])
                if ele_rating:
                    rating = ele_rating[0].replace(',', '.')
                else:
                    rating = None
                break
            else:
                rating = None
        # 品牌
        Brand_list_ = None
        for i in ASIN_XPATH['brand']:
            Brand_list = self.response_s.xpath(i)
            if Brand_list:
                try:
                    if self.site_name == 'de':
                        Brand_list_ = re.findall(r'den(.*)Store', Brand_list[0])
                        if Brand_list_:
                            break
                        else:
                            Brand_list_ = re.findall(r'Marke:(.*)', Brand_list[0])
                            if Brand_list_:
                                break
                    elif self.site_name == 'fr':
                        Brand_list_ = re.findall(r'boutique(.*)', Brand_list[0])
                        if Brand_list_:
                            break
                        else:
                            Brand_list_ = re.findall(r':(.*)', Brand_list[0])
                            if Brand_list_:
                                break
                    elif self.site_name == 'es':
                        Brand_list = re.findall(r'de (.*)', Brand_list[0])
                        if Brand_list:
                            break
                        else:
                            Brand_list_ = re.findall(r'Marca:(.*)', Brand_list[0])
                            if Brand_list_:
                                break
                    else:
                        Brand_list_ = re.findall(r'the(.*)Store', Brand_list[0])
                        if Brand_list_:
                            break
                        else:
                            Brand_list_ = re.findall(r'Brand(.*)', Brand_list[0])
                            if Brand_list_:
                                break
                except:
                    Brand_list_ = None
        if Brand_list_:
            Brand = Brand_list_[0].replace(':', '').strip()
        else:
            Brand = None
        if Brand is None:
            for i in ASIN_XPATH['brand2']:
                Brand_list = self.response_s.xpath(i)
                if Brand_list:
                    Brand = Brand_list[0]
                    break
                else:
                    Brand_href_list = self.response_s.xpath(
                        "//a[@id='amznStoresBylineLogoImageContainer']/following-sibling::a/text()")
                    if Brand_href_list:
                        Brand_list = re.findall(r'Store(.*)', Brand_href_list[0])
                        if Brand_list:
                            Brand = Brand_list[0].replace(',', '').strip()
                            break
                        else:
                            Brand = None
                    else:
                        Brand = None
        if Brand:
            if Brand[-1] == '-':
                # 使用 rsplit 方法从右边分割字符串
                parts = Brand.rsplit('-', 1)
                # 用空字符串重新组合字符串，这样就去掉了最后一个 '-'
                Brand = ''.join(parts)
        if product_json is not None:
            filtered_dict = json.loads(product_json)
            Brand_name = filtered_dict.get('Brand')
            if Brand_name:
                Brand = Brand_name
        # ac词
        for i in ASIN_XPATH['ac_name']:
            ac_name_list = self.response_s.xpath(i)
            if ac_name_list and Brand:
                ac_name = ac_name_list[0].replace(Brand, '')
                break
            else:
                ac_name = None
        # 评论数
        for i in ASIN_XPATH['reviews']:
            number_of_reviews = self.response_s.xpath(i)
            if number_of_reviews:
                number_of_reviews = number_of_reviews[0].strip().replace('\xa0', '')
                if self.site_name == 'de':
                    if "Sternebewertungen" in number_of_reviews:
                        total_comments = re.findall(r"(.*) Sternebewertungen", number_of_reviews)[0]
                    elif "Sternebewertung" in number_of_reviews[0]:
                        total_comments = re.findall(r"(.*) Sternebewertung", number_of_reviews)[0]
                    else:
                        total_comments = None
                    break
                elif self.site_name == 'fr':
                    if "évaluations" in number_of_reviews:
                        total_comments = re.findall(r"(.*)évaluations", number_of_reviews)[0]
                    else:
                        total_comments = number_of_reviews
                    break
                elif self.site_name == 'es':
                    if "valoraciones" in number_of_reviews:
                        total_comments = re.findall(r"(.*)valoraciones", number_of_reviews)[0]
                    else:
                        total_comments = number_of_reviews
                    break
                elif self.site_name == 'it':
                    if "voti" in number_of_reviews:
                        total_comments = re.findall(r"(.*)voti", number_of_reviews)[0]
                    else:
                        total_comments = number_of_reviews
                    break
                else:
                    if "rating" in number_of_reviews:
                        total_comments = re.findall(r"(.*) rating", number_of_reviews)[0]
                    else:
                        total_comments = number_of_reviews
                    break
            else:
                total_comments = None

        # 评论星级百分比
        for i in ASIN_XPATH['star5']:
            star5_list = self.response_s.xpath(i)
            print(star5_list, '2333333star5_list')
            if star5_list:
                star5 = re.findall(r"(\d+)%", star5_list[0])[0]
                break
            else:
                stars_5_list = re.findall(r'(\d+) percent of reviews have 5 stars', self.page_source)
                if stars_5_list:
                    star5 = stars_5_list[0]
                else:
                    star5 = 0
        for i in ASIN_XPATH['star4']:
            star4_list = self.response_s.xpath(i)
            if star4_list:
                star4 = re.findall(r"(\d+)%", star4_list[0])[0]
                break
            else:
                stars_4_list = re.findall(r'(\d+) percent of reviews have 4 stars', self.page_source)
                if stars_4_list:
                    star4 = stars_4_list[0]
                else:
                    star4 = 0
        for i in ASIN_XPATH['star3']:
            star3_list = self.response_s.xpath(i)
            if star3_list:
                star3 = re.findall(r"(\d+)%", star3_list[0])[0]
                break
            else:
                stars_3_list = re.findall(r'(\d+) percent of reviews have 3 stars', self.page_source)
                if stars_3_list:
                    star3 = stars_3_list[0]
                else:
                    star3 = 0
        for i in ASIN_XPATH['star2']:
            star2_list = self.response_s.xpath(i)
            if star2_list:
                star2 = re.findall(r"(\d+)%", star2_list[0])[0]
                break
            else:
                stars_2_list = re.findall(r'(\d+) percent of reviews have 2 stars', self.page_source)
                if stars_2_list:
                    star2 = stars_2_list[0]
                else:
                    star2 = 0
        for i in ASIN_XPATH['star1']:
            star1_list = self.response_s.xpath(i)
            if star1_list:
                star1 = re.findall(r"(\d+)%", star1_list[0])[0]
                break
            else:
                stars_1_list = re.findall(r'(\d+) percent of reviews have 1 stars', self.page_source)
                if stars_1_list:
                    star1 = stars_1_list[0]
                else:
                    star1 = 0

        low_star = int(star3) + int(star2) + int(star1)

        # 评论分析
        for i in ASIN_XPATH['review_ai_list']:
            ai_list = self.response_s.xpath(i)
            if ai_list:
                review_ai_text = ai_list[0]
                break
            else:
                review_ai_text = None
        # 评论分析标签
        for i in ASIN_XPATH['review_button_list']:
            button_list = self.response_s.xpath(i)
            button_dict = {}
            for button in button_list:
                try:
                    button_text = button.xpath('./text()')[0]
                    i = button_list.index(button)
                    span_text = self.response_s.xpath(f"//div[@id='aspect-bottom-sheet-0-{i}']//span/text()")
                    p_text = self.response_s.xpath(f"//div[@id='aspect-bottom-sheet-0-{i}']//p/text()")
                    _text = '&&&&'.join(span_text) + '|-|' + '&&&&'.join(p_text)
                    button_dict[button_text] = _text
                except:
                    pass
        if len(button_dict) < 1:
            review_label_json = None
        else:
            review_label_json = json.dumps(button_dict, ensure_ascii=False)

        # 材质
        for i in ASIN_XPATH['material']:
            Material_list = self.response_s.xpath(i)
            if Material_list:
                Material = Material_list[0]
                break
            else:
                Material = None
        # 套装
        for i in ASIN_XPATH['package_quantity']:
            package_quantity_list = self.response_s.xpath(i)
            if package_quantity_list:
                package_quantity = package_quantity_list[0].strip()
                break
            else:
                package_quantity = None
        # 样式
        for i in ASIN_XPATH['pattern_name']:
            pattern_name_list = self.response_s.xpath(i)
            if pattern_name_list:
                pattern_name = pattern_name_list[0].strip()
                break
            else:
                pattern_name = None
        # 价格
        for i in ASIN_XPATH['price']:
            ele_price = self.response_s.xpath(i)
            if ele_price:
                if self.site_name in ['de', 'fr', 'es', 'it']:
                    price = self.price_replace(ele_price[0], self.site_name)
                    price = price.replace(",", '.')
                    break
                else:
                    price = self.price_replace(ele_price[0], self.site_name)
                    if (price == '0.00' and len(ele_price) > 2) or (
                            'with try free for 7 days' in price and len(ele_price) > 2):
                        price = self.price_replace(ele_price[2], self.site_name)
                break
            else:
                price = None
        if price is None:
            price = self.get_price()
        elif len(price.strip()) < 1:
            price = self.get_price()

        if self.site_name == 'us':
            # 判断是否有 Coupon 促销类型
            deal_type = []
            coupon_int = None
            Voucher_list = self.response_s.xpath(
                f"//div[@data-csa-c-asin='{self.asin}']//label/parent::div/parent::span/parent::span/parent::div/following-sibling::span[contains(@id,'coupon')]/text()|//div[@data-csa-c-asin='{self.asin}']//label/text()|//label[contains(@id,'couponText')]/text()")
            if Voucher_list:
                Voucher = ''.join(Voucher_list)
            else:
                for i in ASIN_XPATH['coupon']:
                    Voucher_list = self.response_s.xpath(i)
                    if Voucher_list:
                        Voucher = ''.join(Voucher_list).replace('\xa0', '')
                        break
                    else:
                        Voucher = None
            if Voucher:
                if "coupon" in Voucher or "Coupon" in Voucher or 'Quantity' in Voucher:
                    coupon = re.findall(r'Save (\d+)%', Voucher)
                    if coupon:
                        deal_type.append('1')
                    else:
                        if '.' not in Voucher:
                            coupon = re.findall(r'Save \$(\d+)', Voucher)
                        else:
                            coupon = re.findall(r'Save \$(\d.\d+)', Voucher)
                        if len(coupon) == 0:
                            coupon = re.findall(r'\$(\d.?) on', Voucher)
                            if len(coupon) == 0:
                                coupon = re.findall(r'\$(\d+) ', Voucher)
                                if len(coupon) == 0:
                                    coupon = re.findall(r'\$(\d+)', Voucher)
                        if coupon:
                            deal_type.append('2')
                        else:
                            coupon = re.findall(r'Apply (\d+)%', Voucher)
                            if len(coupon) == 0:
                                coupon = re.findall(r'(\d+)%', Voucher)
                            if coupon:
                                deal_type.append('1')
                    if coupon:
                        try:
                            coupon_int = float(coupon[0].replace(',', '.'))
                        except:
                            coupon_int = None
                    else:
                        coupon_int = None

            Join_Prime_int = None
            for i in ASIN_XPATH['Join_Prime']:
                Join_Prime = self.response_s.xpath(i)
                if Join_Prime:
                    Join_Prime_list = re.findall(r'\$(\d.\d)', Join_Prime[0])
                    try:
                        if Join_Prime_list:
                            Join_Prime_int = float(Join_Prime_list[0])
                            deal_type.append('4')
                        else:
                            Join_Prime_list = re.findall(r'\$(\d)', Join_Prime[0])
                            Join_Prime_int = float(Join_Prime_list[0])
                            deal_type.append('4')
                        break
                    except:
                        Join_Prime_int = None

            for i in ASIN_XPATH['Deal_Price']:
                Deal_Price = self.response_s.xpath(i)
                if Deal_Price:
                    deal_type.append('5')
                    break
            for i in ASIN_XPATH['Top_Deal']:
                Top_Deal = self.response_s.xpath(i)
                if Top_Deal:
                    deal_type.append('6')
                    break
            for i in ASIN_XPATH['Extra_Savings']:
                Extra_Savings = self.response_s.xpath(i)
                if Extra_Savings:
                    deal_type.append('7')
                    break
            # 30天最低价
            for i in ASIN_XPATH['delight_pricing']:
                delight_pricing_list = self.response_s.xpath(i)
                if delight_pricing_list:
                    delight_pricing = ''.join(delight_pricing_list)
                    break
                else:
                    delight_pricing = None
            # delight_pricing_str = None
            # if delight_pricing:
            #     if "Save" in delight_pricing and "%" in delight_pricing:
            #         delight_pricing_str = True

            for i in ASIN_XPATH['price_30']:
                price_30 = self.response_s.xpath(i)
                if price_30:
                    break
                else:
                    price_30 = None

            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)
                if You_Save:
                    break
                else:
                    You_Save = None

            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            deal_type.append('8')
                        except:
                            Save_price_int = None
                        break
                    else:
                        Save_price_int = None
            else:
                Save_price_int = None

            if deal_type:
                deal_type = ','.join(deal_type)
            else:
                deal_type = None
        elif self.site_name == 'uk':
            # 判断是否有 Coupon 促销类型
            coupon_deal_list = []
            coupon_int = None
            Voucher_buy = ''
            Voucher_list = self.response_s.xpath(
                f"//span[contains(@class,'coupon')]/text()|//div[@data-csa-c-asin='{self.asin}']//label/parent::div/parent::span/parent::span/parent::div/following-sibling::span[contains(@id,'coupon')]/text()//div[@data-csa-c-asin='{self.asin}']/label/text()|//label[contains(@id,'couponText')]/text()|//span[contains(text(),'Voucher')]/text()|//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'voucher')]/parent::div/span/text()")
            print('uk, Voucher_list::', Voucher_list)
            if Voucher_list:
                Voucher = ''.join(Voucher_list).replace('\xa0', '')
                if 'Voucher' in Voucher or 'voucher' in Voucher or 'Quantity' in Voucher:
                    Voucher_buy = '1'
                    coupon = re.findall(r'Save (\d+)%', Voucher)
                    if coupon:
                        coupon_deal_list.append('1')
                    else:
                        coupon = re.findall(r'Save £(\d+)', Voucher)
                        if coupon:
                            coupon_deal_list.append('2')
                        else:
                            coupon = re.findall(r'Apply (\d+)%', Voucher)
                            if coupon:
                                coupon_deal_list.append('1')
                    if coupon:
                        try:
                            coupon_int = int(coupon[0])
                        except:
                            coupon_int = None
                    else:
                        coupon_int = None
            Promotion_Message_int = None
            for i in ASIN_XPATH['Promotion_Message']:
                Promotion_Message = self.response_s.xpath(i)
                if Promotion_Message:
                    Promotion_Message_join = ''.join(Promotion_Message)
                    Promotion_Message_str_list = re.findall(r'Save (\d+)%', Promotion_Message_join)
                    if len(Promotion_Message_str_list) == 0:
                        Promotion_Message_str_list = re.findall(r'(\d+)%', Promotion_Message_join)
                    try:
                        Promotion_Message_int = float(Promotion_Message_str_list[0])
                        coupon_deal_list.append('3')
                    except:
                        pass
                    break

            Join_Prime_int = None
            for i in ASIN_XPATH['Join_Prime']:
                Join_Prime = self.response_s.xpath(i)
                if Join_Prime:
                    Join_Prime_list = re.findall(r'£(\d.\d)', Join_Prime[0])
                    try:
                        if Join_Prime_list:
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                        else:
                            Join_Prime_list = re.findall(r'£(\d)', Join_Prime[0])
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                    except:
                        Join_Prime_int = None
                    break

            if len(Voucher_list) > 1 and Voucher_buy:
                for i in ASIN_XPATH['Buy_Deal']:
                    Buy_Deal = self.response_s.xpath(i)
                    if Buy_Deal:
                        coupon_deal_list.append('7')
                        break
            for i in ASIN_XPATH['Top_Deal']:
                Top_Deal = self.response_s.xpath(i)
                if Top_Deal:
                    coupon_deal_list.append('8')
                    break
            for i in ASIN_XPATH['Deal_Price']:
                Deal_Price = self.response_s.xpath(i)
                if Deal_Price:
                    coupon_deal_list.append('9')
                    break

            # 30天最低价
            for i in ASIN_XPATH['delight_pricing']:
                delight_pricing_list = self.response_s.xpath(i)
                if delight_pricing_list:
                    delight_pricing = ''.join(delight_pricing_list)
                    break
                else:
                    delight_pricing = None

            # delight_pricing_str = None
            # if delight_pricing:
            #     if "Save" in delight_pricing and "%" in delight_pricing:
            #         delight_pricing_str = True
            for i in ASIN_XPATH['price_30']:
                price_30_list = self.response_s.xpath(i)
                if price_30_list:
                    price_30 = price_30_list[0]
                    break
                else:
                    price_30 = None
            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)
                if You_Save:
                    break
                else:
                    You_Save = None

            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            coupon_deal_list.append('10')
                        except:
                            Save_price_int = None
                            pass
                        break
                    else:
                        Save_price_int = None
            else:
                Save_price_int = None

            if coupon_deal_list:
                deal_type = ','.join(coupon_deal_list)
            else:
                deal_type = None
        elif self.site_name == 'de':
            # 判断是否有 Coupon 促销类型
            coupon_deal_list = []
            coupon_int = None
            deal_type = []
            coupon_trne = ''
            ASIN_XPATH['coupon'].append(f"//div[@data-csa-c-asin='{self.asin}']//label/text()")
            for i in ASIN_XPATH['coupon']:
                Voucher_list = self.response_s.xpath(i)
                if Voucher_list:
                    Voucher = ''.join(Voucher_list)
                    Voucher = Voucher.replace('\xa0', '')
                    coupon = re.findall(r'und (\d.*?)%', Voucher)
                    if len(coupon) == 0:
                        coupon = re.findall(r'(\d.*?)%', Voucher)
                    if coupon:
                        try:
                            coupon_int = float(coupon[0].replace(',', '.'))
                            coupon_deal_list.append('1')
                            coupon_trne = '1'
                        except:
                            coupon_int = None
                        break
                    else:
                        coupon = re.findall(r'und (\d.*?)€', Voucher)
                        if len(coupon) == 0:
                            coupon = re.findall(r'(\d.*?)€', Voucher)
                        if coupon:
                            try:
                                coupon_int = float(coupon[0].replace(',', '.'))
                                coupon_deal_list.append('2')
                                coupon_trne = '1'
                            except:
                                coupon_int = None
                            break
            Sparen_int = None
            if len(coupon_trne) > 0:
                for i in ASIN_XPATH['Sparen_Sie']:
                    Sparen_Sie = self.response_s.xpath(i)
                    if Sparen_Sie:
                        Sparen_str = ''.join(Sparen_Sie)
                        Sparen_ = re.findall(r'Sie (\d.*?)%', Sparen_str)
                        if Sparen_:
                            try:
                                Sparen_int = float(Sparen_[0])
                                coupon_deal_list.append('3')
                            except:
                                Sparen_int = None
                            break
                        else:
                            Sparen_ = re.findall(r'Sie (\d.*?)€', Sparen_str)
                            try:
                                Sparen_int = float(Sparen_[0])
                                coupon_deal_list.append('4')
                            except:
                                Sparen_int = None
                            break
            Aktuelle_Angebote_int = None
            for i in ASIN_XPATH['Aktuelle_Angebote']:
                Aktuelle_Angebote = self.response_s.xpath(i)
                if Aktuelle_Angebote:
                    Aktuelle_Angebote_str = ''.join(Aktuelle_Angebote)
                    Aktuelle_Angebote_ = re.findall(r"(\d.*?)%", Aktuelle_Angebote_str)
                    try:
                        Aktuelle_Angebote_int = float(Aktuelle_Angebote_[0])
                        coupon_deal_list.append('5')
                    except:
                        Aktuelle_Angebote_int = None
                    break

            for i in ASIN_XPATH['Angebotspreis']:
                Angebote = self.response_s.xpath(i)
                if Angebote:
                    coupon_deal_list.append('7')

            for i in ASIN_XPATH['Top_Angebot']:
                Top_Angebot = self.response_s.xpath(i)
                if Top_Angebot:
                    coupon_deal_list.append('8')

            # 30天最低价
            for i in ASIN_XPATH['delight_pricing_list']:
                delight_pricing_list = self.response_s.xpath(i)
                delight_pricing = ''.join(delight_pricing_list)
                break
            # delight_pricing_str = None
            # if delight_pricing:
            #     if "Sparen Sie" in delight_pricing and '%' in delight_pricing:
            #         delight_pricing_str = True
            for i in ASIN_XPATH['price_30']:
                price_30 = self.response_s.xpath(i)

            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)

            Save_price_int = None
            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            coupon_deal_list.append('9')
                        except:
                            Save_price_int = None
                            pass
                        break

            if coupon_deal_list:
                deal_type = ','.join(coupon_deal_list)
            else:
                deal_type = None
        elif self.site_name == 'fr':
            coupon_deal_list = []
            coupon_int = None
            Voucher_buy = ''
            Voucher_list = self.response_s.xpath(
                f"//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'coupon')]/text()|//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'Coupon')]/parent::div/span/text()")
            if Voucher_list:
                Voucher = ''.join(Voucher_list).replace('\xa0', '')
                if 'coupon' in Voucher or 'Coupon' in Voucher:
                    Voucher_buy = '1'
                    Voucher = Voucher.replace('\xa0', '')
                    coupon = re.findall(r'Économisez (\d+)%', Voucher)
                    if coupon:
                        coupon_deal_list.append('1')
                    else:
                        coupon = re.findall(r'Économisez (\d+)€', Voucher)
                        if coupon:
                            coupon_deal_list.append('2')
                        else:
                            coupon = re.findall(r' (\d+)%', Voucher)
                            if coupon:
                                coupon_deal_list.append('1')
                            else:
                                coupon = re.findall(r' (\d+)€', Voucher)
                                if coupon:
                                    coupon_deal_list.append('2')
                    if coupon:
                        try:
                            coupon_int = int(coupon[0])
                        except:
                            coupon_int = None
                    else:
                        coupon_int = None
            Promotion_Message_int = None
            for i in ASIN_XPATH['Promotion_Message']:
                Promotion_Message = self.response_s.xpath(i)
                if Promotion_Message:
                    Promotion_Message_join = ''.join(Promotion_Message)
                    Promotion_Message_str_list = re.findall(r'Save (\d+)%', Promotion_Message_join)
                    if len(Promotion_Message_str_list) == 0:
                        Promotion_Message_str_list = re.findall(r'(\d+)%', Promotion_Message_join)
                    try:
                        Promotion_Message_int = float(Promotion_Message_str_list[0])
                        coupon_deal_list.append('3')
                    except:
                        print(self.asin, '转换 Promotion_Message 失败')
                    break

            Join_Prime_int = None
            for i in ASIN_XPATH['Join_Prime']:
                Join_Prime = self.response_s.xpath(i)
                if Join_Prime:
                    Join_Prime_ = Join_Prime[0].replace('\xa0', '').strip()
                    Join_Prime_list = re.findall(r'(\d.\d),', Join_Prime_)
                    try:
                        if Join_Prime_list:
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                        else:
                            Join_Prime_list = re.findall(r'(\d),', Join_Prime_)
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                    except:
                        print(self.asin, "转换 Join_Prime 优惠失败")
                        Join_Prime_int = None
                    break

            if len(Voucher_list) > 1 and Voucher_buy:
                for i in ASIN_XPATH['Buy_Deal']:
                    Buy_Deal = self.response_s.xpath(i)
                    if Buy_Deal:
                        coupon_deal_list.append('7')
                        break
            for i in ASIN_XPATH['Offre_star']:
                Offre_star = self.response_s.xpath(i)
                if Offre_star:
                    coupon_deal_list.append('8')
                    break

            for i in ASIN_XPATH['Prix_de']:
                Prix_de = self.response_s.xpath(i)
                if Prix_de:
                    coupon_deal_list.append('9')
                    break
            # 30天最低价
            for i in ASIN_XPATH['delight_pricing']:
                delight_pricing_list = self.response_s.xpath(i)
                delight_pricing = ''.join(delight_pricing_list)
                break
            # delight_pricing_str = None
            # if len(delight_pricing) > 0:
            #     if "Sparen Sie" in delight_pricing and '%' in delight_pricing:
            #         delight_pricing_str = True
            for i in ASIN_XPATH['price_30']:
                price_30 = self.response_s.xpath(i)

            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)

            Save_price_int = None
            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            coupon_deal_list.append('10')
                        except:
                            Save_price_int = None
                            pass
                        break
            if coupon_deal_list:
                deal_type = ','.join(coupon_deal_list)
            else:
                deal_type = None
        elif self.site_name == 'it':
            coupon_deal_list = []
            coupon_int = None
            Voucher_buy = ''
            Voucher_list = self.response_s.xpath(
                f"//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'coupon')]/text()|//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'Coupon')]/parent::div/span/text()")
            if Voucher_list:
                Voucher = ''.join(Voucher_list)
                if 'Coupon' in Voucher or 'coupon' in Voucher or 'Quantity' in Voucher:
                    Voucher_buy = '1'
                    Voucher = Voucher.replace('\xa0', '')
                    coupon = re.findall(r' (\d+)%', Voucher)
                    if coupon:
                        coupon_deal_list.append('1')
                    else:
                        coupon = re.findall(r' (\d+)€', Voucher)
                        if coupon:
                            coupon_deal_list.append('2')
                        else:
                            coupon = re.findall(r' (\d+)%.', Voucher)
                            if coupon:
                                coupon_deal_list.append('1')
                            else:
                                coupon = re.findall(r' (\d+)€', Voucher)
                                if coupon:
                                    coupon_deal_list.append('2')
                    if coupon:
                        try:
                            coupon_int = int(coupon[0])
                        except:
                            coupon_int = None
                    else:
                        coupon_int = None

            Promotion_Message_int = None
            for i in ASIN_XPATH['Promotion_Message']:
                Promotion_Message = self.response_s.xpath(i)
                if Promotion_Message:
                    Promotion_Message_join = ''.join(Promotion_Message)
                    Promotion_Message_str_list = re.findall(r'Save (\d+)%', Promotion_Message_join)
                    if len(Promotion_Message_str_list) == 0:
                        Promotion_Message_str_list = re.findall(r'(\d+)%', Promotion_Message_join)
                    try:
                        Promotion_Message_int = float(Promotion_Message_str_list[0])
                        coupon_deal_list.append('3')
                    except:
                        print(self.asin, '转换 Promotion_Message 失败', Promotion_Message_str_list)
                    break
            Join_Prime_int = None
            for i in ASIN_XPATH['Join_Prime']:
                Join_Prime = self.response_s.xpath(i)
                if Join_Prime:
                    Join_Prime_ = Join_Prime[0].replace('\xa0', '').replace(',', '.').strip()
                    Join_Prime_list = re.findall(r'(\d.\d+)€', Join_Prime_)
                    try:
                        if Join_Prime_list:
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                        else:
                            Join_Prime_list = re.findall(r'(\d)€', Join_Prime_)
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                    except:
                        print(self.asin, "转换 Join_Prime 优惠失败", Join_Prime)
                        Join_Prime_int = None
                    break

            if len(Voucher_list) > 1 and Voucher_buy:
                for i in ASIN_XPATH['Buy_Deal']:
                    Buy_Deal = self.response_s.xpath(i)
                    if Buy_Deal:
                        coupon_deal_list.append('7')
                        break
            for i in ASIN_XPATH['Oferta_Top']:
                Oferta_Top = self.response_s.xpath(i)
                if Oferta_Top:
                    coupon_deal_list.append('8')

            # 30天最低价
            for i in ASIN_XPATH['delight_pricing_list']:
                delight_pricing_list = self.response_s.xpath(i)
                delight_pricing = ''.join(delight_pricing_list)
                break

            # delight_pricing_str = None
            # if delight_pricing:
            #     if "Risparmi" in delight_pricing and "%" in delight_pricing:
            #         delight_pricing_str = True

            for i in ASIN_XPATH['price_30']:
                price_30 = self.response_s.xpath(i)

            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)

            Save_price_int = None
            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            coupon_deal_list.append('10')
                        except:
                            Save_price_int = None
                            pass
                        break
            if coupon_deal_list:
                deal_type = ','.join(coupon_deal_list)
            else:
                deal_type = None
        elif self.site_name == 'es':
            coupon_deal_list = []
            coupon_int = None
            Voucher_buy = ''
            Voucher_list = self.response_s.xpath(
                f"//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'coupon')]/text()|//div[@data-csa-c-asin='{self.asin}']/label/text()|//span[contains(text(),'Coupon')]/parent::div/span/text()")
            if Voucher_list:
                Voucher = ''.join(Voucher_list)
                if 'cupón' in Voucher or 'Cupón' in Voucher or 'Quantity' in Voucher:
                    Voucher_buy = '1'
                    Voucher = Voucher.replace('\xa0', '')
                    coupon = re.findall(r'de (\d+)%', Voucher)
                    if coupon:
                        coupon_deal_list.append('1')
                    else:
                        coupon = re.findall(r'de (\d+)€', Voucher)
                        if coupon:
                            coupon_deal_list.append('2')
                        else:
                            coupon = re.findall(r' (\d+)%', Voucher)
                            if coupon:
                                coupon_deal_list.append('1')
                            else:
                                coupon = re.findall(r' (\d+)€', Voucher)
                                if coupon:
                                    coupon_deal_list.append('2')
                    if coupon:
                        try:
                            coupon_int = int(coupon[0])
                        except:
                            coupon_int = None
                    else:
                        coupon_int = None

            Promotion_Message_int = None
            for i in ASIN_XPATH['Promotion_Message']:
                Promotion_Message = self.response_s.xpath(i)
                if Promotion_Message:
                    Promotion_Message_join = ''.join(Promotion_Message)
                    if self.site_name != 'us':
                        if Promotion_Message_join.lower() in ["black friday", "black friday deal"]:
                            site_prime = self.response_s.xpath(
                                "//span[@class='a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage']//text()")
                            if site_prime:
                                try:
                                    site_prime = ''.join(site_prime)
                                    three_four = re.findall("\d+,\d+", site_prime.replace("\xa0", "")) or re.findall(
                                        r"\d+", site_prime.replace("\xa0", ""))
                                    Promotion_Message_int = float(three_four[0]) if three_four else ""
                                    coupon_deal_list.append("3" if "%" in site_prime else "4")
                                    break
                                except:
                                    pass
                    Promotion_Message_str_list = re.findall(r'Save (\d+)%', Promotion_Message_join)
                    if len(Promotion_Message_str_list) == 0:
                        Promotion_Message_str_list = re.findall(r'(\d+)%', Promotion_Message_join)
                    try:
                        if Promotion_Message_str_list:
                            Promotion_Message_int = float(Promotion_Message_str_list[0])
                            coupon_deal_list.append('3')
                        else:
                            Promotion_Message_str_list = re.findall(r'(\d+)€', Promotion_Message_join)
                            Promotion_Message_int = float(Promotion_Message_str_list[0])
                            coupon_deal_list.append('4')

                    except:
                        print(self.asin, '转换 Promotion_Message 失败', Promotion_Message_str_list)
                    break
            Join_Prime_int = None
            for i in ASIN_XPATH['Join_Prime']:
                Join_Prime = self.response_s.xpath(i)
                if Join_Prime:
                    Join_Prime_ = Join_Prime[0].replace('\xa0', '').replace(',', '.').strip()
                    Join_Prime_list = re.findall(r'(\d.\d+)€', Join_Prime_)
                    try:
                        if Join_Prime_list:
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                        else:
                            Join_Prime_list = re.findall(r'(\d)€', Join_Prime_)
                            Join_Prime_int = float(Join_Prime_list[0])
                            coupon_deal_list.append('6')
                    except:
                        print(self.asin, "转换 Join_Prime 优惠失败", Join_Prime)
                        Join_Prime_int = None
                    break
            if len(Voucher_list) > 1 and Voucher_buy:
                for i in ASIN_XPATH['Buy_Deal']:
                    Buy_Deal = self.response_s.xpath(i)
                    if Buy_Deal:
                        coupon_deal_list.append('7')
                        break

            for i in ASIN_XPATH['Oferta_Top']:
                Oferta_Top = self.response_s.xpath(i)
                if Oferta_Top:
                    coupon_deal_list.append('8')

            # 30天最低价
            for i in ASIN_XPATH['delight_pricing_list']:
                delight_pricing_list = self.response_s.xpath(i)
                delight_pricing = ''.join(delight_pricing_list)
                break

            # delight_pricing_str = None
            # if delight_pricing:
            #     if "Ahorra un" in delight_pricing and "%" in delight_pricing:
            #         delight_pricing_str = True

            for i in ASIN_XPATH['price_30']:
                price_30 = self.response_s.xpath(i)

            for i in ASIN_XPATH['You_Save']:
                You_Save = self.response_s.xpath(i)

            Save_price_int = None
            if price_30 and You_Save:
                for i in ASIN_XPATH['You_Save_price_list']:
                    You_Save_price_list = self.response_s.xpath(i)
                    if You_Save_price_list:
                        Save_price_join = ''.join(You_Save_price_list)
                        Save_price = re.findall(r"(\d+)%", Save_price_join)
                        try:
                            Save_price_int = int(Save_price[0])
                            coupon_deal_list.append('10')
                        except:
                            Save_price_int = None
                            pass
                        break
            if coupon_deal_list:
                deal_type = ','.join(coupon_deal_list)
            else:
                deal_type = None

        # bsr 文本
        for i in ASIN_XPATH['best_sellers_herf']:
            best_sellers_herf_list = self.response_s.xpath(i)
            if best_sellers_herf_list:
                best_sellers_herf = best_sellers_herf_list[-1]
                all_best_sellers_herf = '&&&&'.join(best_sellers_herf_list)
                break
            else:
                best_sellers_herf = None
                all_best_sellers_herf = None

        if self.site_name == 'de':
            for i in ASIN_XPATH['best_sellers_text']:
                best_sellers_text_list = self.response_s.xpath(i)
                if best_sellers_text_list:
                    all_bsr_category = '›'.join(best_sellers_text_list)
                    break
                else:
                    all_bsr_category = None

            if category is None and all_bsr_category:
                bsr_category_list = re.findall(r' in (.*)', all_bsr_category)
                category = bsr_category_list[0] if bsr_category_list else None
            if node_id is None and best_sellers_herf:
                node_id_list = re.findall(r'/(\d+)/ref=', best_sellers_herf)
                node_id = node_id_list[0] if node_id_list else None

        # rank 排名
        for i in ASIN_XPATH['Best_rank']:
            Best_rank_list_th = self.response_s.xpath(i)
            if Best_rank_list_th and best_sellers_herf:
                best_sellers_rank = ''.join(Best_rank_list_th).strip()
                if best_sellers_rank:
                    break
                else:
                    best_sellers_rank = None
            else:
                best_sellers_rank = None

        if best_sellers_rank is None:
            for i in ASIN_XPATH['Best_rank2']:
                Best_rank_list_span = self.response_s.xpath(i)
                if Best_rank_list_span and best_sellers_herf:
                    best_sellers_rank = ''.join(Best_rank_list_span).strip()
                    break
                else:
                    best_sellers_rank = None

        if best_sellers_rank:
            if self.site_name == 'de':
                best_sellers_rank = best_sellers_rank.replace('Amazon Bestseller-Rang', '').replace(
                    'Amazon Bestseller-Rang :', '').strip()
                self.bs_category_asin_list_pg.append(
                    [self.asin, self.date_info, best_sellers_rank, best_sellers_herf, all_best_sellers_herf])
            elif self.site_name == 'fr':
                best_sellers_rank = best_sellers_rank.replace("Classement des meilleures ventes d'Amazon",
                                                              '').replace(
                    "Classement des meilleures ventes d'Amazon :", '').strip()
                self.bs_category_asin_list_pg.append(
                    [self.asin, self.date_info, best_sellers_rank, best_sellers_herf, all_best_sellers_herf])
            elif self.site_name == 'it':
                best_sellers_rank = best_sellers_rank.replace(
                    "Posizione nella classifica Bestseller di Amazon",
                    '').replace(
                    "Posizione nella classifica Bestseller di Amazon :", '').strip()
                self.bs_category_asin_list_pg.append(
                    [self.asin, self.date_info, best_sellers_rank, best_sellers_herf, all_best_sellers_herf])
            elif self.site_name == 'es':
                best_sellers_rank = best_sellers_rank.replace('Clasificación en los más vendidos de Amazon',
                                                              '').replace(
                    'Clasificación en los más vendidos de Amazon :', '').strip()
                self.bs_category_asin_list_pg.append(
                    [self.asin, self.date_info, best_sellers_rank, best_sellers_herf, all_best_sellers_herf])

            else:
                best_sellers_rank = best_sellers_rank.replace("Best Sellers Rank:", '').replace(
                    "Best Sellers Rank", '').strip()
                self.bs_category_asin_list_pg.append(
                    [self.asin, self.date_info, best_sellers_rank, best_sellers_herf, all_best_sellers_herf])
        try:
            if self.site_name == 'us':
                comment = re.compile(r"Best Sellers Rank(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if "(<a" in Rank_class[1]:
                        Rank = re.findall(r"(.*?)\(<a", Rank_class[1])
                        Rank = "".join(Rank)
                    elif "<a href=" in Rank_class[1]:
                        Rank = re.findall(r"(.*?)<a href=.*>(.*)", Rank_class[1])
                        Rank = Rank[0][0] + Rank[0][1]
                    else:
                        Rank = Rank_class[1]
                else:
                    comment1 = re.compile(r"Best-sellers rank(.*?)</a>", re.DOTALL)
                    comment1_lsit1 = comment1.findall(self.page_source)
                    if comment1_lsit1:
                        # 类目排名
                        Rank_class1 = ''.join(comment1_lsit1).split("#")
                        if "(<a" in Rank_class1[1]:
                            Rank = re.findall(r"(.*?)\(<a", Rank_class1[1])
                            Rank = "".join(Rank)
                        elif "<a href=" in Rank_class1[1]:
                            Rank = re.findall(r"(.*?)<a href=.*>(.*)", Rank_class1[1])
                            Rank = Rank[0][0] + Rank[0][1]
                        else:
                            Rank = Rank_class1[1]
                    else:
                        Rank = None
                if Rank:
                    rank = re.findall(r'(.*) in', Rank)[0]
                    if len(rank) > 5 and 'in' in rank:
                        rank = re.findall(r'(.*?) in', Rank)[0]
                    else:
                        rank_ = re.findall(r'\d', rank)
                        rank = ''.join(rank_).replace(',', '').strip()
                else:
                    rank = None
            elif self.site_name == 'uk':
                rank = None
                comment = re.compile(r"Best Sellers Rank(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if len(Rank_class) > 1:
                        if "(<a" in Rank_class[1]:
                            Rank = re.findall(r"(.*?)\(<a", Rank_class[1])
                            Rank = "".join(Rank)
                        elif "<a href=" in Rank_class[1]:
                            Rank = re.findall(r"(.*?)<a href=.*>(.*)", Rank_class[1])
                            Rank = Rank[0][0] + Rank[0][1]
                        else:
                            Rank = Rank_class[1]
                    else:
                        Rank = re.findall(r'(.*) in', Rank_class[0])[0]
                else:
                    comment1 = re.compile(r"Best-sellers rank(.*?)</a>", re.DOTALL)
                    comment1_lsit1 = comment1.findall(self.page_source)
                    if comment1_lsit1:
                        # 类目排名
                        Rank_class1 = ''.join(comment1_lsit1).split("#")
                        if len(Rank_class1) > 1:
                            if "(<a" in Rank_class1[1]:
                                Rank = re.findall(r"(.*?)\(<a", Rank_class1[1])
                                Rank = "".join(Rank)
                            elif "<a href=" in Rank_class1[1]:
                                Rank = re.findall(r"(.*?)<a href=.*>(.*)", Rank_class1[1])
                                Rank = Rank[0][0] + Rank[0][1]
                            else:
                                Rank = Rank_class1[1]
                        else:
                            Rank = re.findall(r'(.*) in', Rank_class1[0])[0]
                    else:
                        Rank = None
                if Rank:
                    patterns = [
                        r">(\d.*) in", r"> (\d.*) in", r"(\d.*?) in", r">(\d.*)", r"> (\d.*)"]
                    # 遍历每个正则表达式模式，并进行匹配
                    for pattern in patterns:
                        matches = re.findall(pattern, Rank)
                        if matches:
                            rank = matches[0]
                            break
                else:
                    rank = None
            elif self.site_name == 'de':
                comment = re.compile(r"Amazon Bestseller-Rang(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if "(<a" in Rank_class[0]:
                        Rank = re.findall(r"Nr. (.*?) in", Rank_class[0])
                        if len("".join(Rank)) > 10:
                            rank = "".join(Rank[0])
                        else:
                            rank = "".join(Rank)
                    else:
                        if len(Rank_class) > 1:
                            if "<a href=" in Rank_class[1]:
                                Rank = re.findall(r"(.*?)<a href=.*>(.*)", Rank_class[1])
                                rank = Rank[0][0] + Rank[0][1]
                            else:
                                rank = Rank_class[1]
                        else:
                            if "<a href=" in Rank_class[0]:
                                Rank = re.findall(r"Nr. (.*?)<a href=.*>(.*)", Rank_class[0])
                                rank = Rank[0][0] + Rank[0][1]
                            else:
                                rank = None
                else:
                    rank = None
            elif self.site_name == 'fr':
                comment = re.compile(r"Classement des meilleures ventes d'Amazon(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if "(<a" in Rank_class[0]:
                        Rank_list = re.findall(r"(\d.*?) \(", Rank_class[0])
                        Rank = re.findall(r"(\d.*?) en", Rank_list[0])
                        if len("".join(Rank)) > 10:
                            rank = "".join(Rank[0])
                        else:
                            rank = "".join(Rank)
                    else:
                        if len(Rank_class) > 1:
                            if "<a href=" in Rank_class[1]:
                                Rank = re.findall(r"(\d.*?) en", Rank_class[1])
                                rank = "".join(Rank)
                            else:
                                rank = Rank_class[1]
                        else:
                            if "<a href=" in Rank_class[0]:
                                Rank = re.findall(r"(\d.*?) en", Rank_class[0])
                                if len("".join(Rank)) > 10:
                                    rank = "".join(Rank[0])
                                else:
                                    rank = "".join(Rank)
                            else:
                                rank = None
                else:
                    rank = None
            elif self.site_name == 'it':
                comment = re.compile(r"Posizione nella classifica Bestseller di Amazon(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if "(<a" in Rank_class[0]:
                        Rank_list = re.findall(r"(\d.*?) \(", Rank_class[0])
                        Rank = re.findall(r"(\d.*?) in", Rank_list[0])
                        if len("".join(Rank)) > 10:
                            rank = "".join(Rank[0])
                        else:
                            rank = "".join(Rank)
                    else:
                        if len(Rank_class) > 1:
                            if "<a href=" in Rank_class[1]:
                                Rank = re.findall(r"(\d.*?) in", Rank_class[1])
                                rank = "".join(Rank)
                            else:
                                rank = Rank_class[1]
                        else:
                            if "<a href=" in Rank_class[0]:
                                Rank = re.findall(r"n. (\d.*?) in", Rank_class[0])
                                if len("".join(Rank)) > 10:
                                    rank = "".join(Rank[0])
                                else:
                                    rank = "".join(Rank)
                            else:
                                rank = None
                else:
                    rank = None
            elif self.site_name == 'es':
                comment = re.compile(r"Clasificación en los más vendidos de Amazon(.*?)</a>", re.DOTALL)
                comment_lsit = comment.findall(self.page_source)
                print('rank:lsit::L', comment_lsit)
                if comment_lsit:
                    # 类目排名
                    Rank_class = ''.join(comment_lsit).split("#")
                    if "(<a" in Rank_class[0]:
                        Rank_list = re.findall(r"(\d.*?) \(", Rank_class[0])
                        Rank = re.findall(r"(\d.*?) en", Rank_list[0])
                        if len("".join(Rank)) > 10:
                            rank = "".join(Rank[0])
                        else:
                            rank = "".join(Rank)

                    else:
                        if len(Rank_class) > 1:
                            if "<a href=" in Rank_class[1]:
                                Rank = re.findall(r"(\d.*?) en", Rank_class[1])
                                rank = "".join(Rank)
                            else:
                                rank = Rank_class[1]
                        else:
                            if "<a href=" in Rank_class[0]:
                                Rank = re.findall(r"(\d.*?) en", Rank_class[0])
                                if len("".join(Rank)) > 10:
                                    rank = "".join(Rank[0])
                                else:
                                    rank = "".join(Rank)
                            else:
                                rank = None
                else:
                    rank = None
        except:
            rank = None
        # 重量
        try:
            for i in ASIN_XPATH['Weight']:
                Weight_list = self.response_s.xpath(i)
                if Weight_list:
                    w = Weight_list[0].lower()
                    if self.site_name in ['uk', 'de', 'fr', 'es', 'it']:
                        if ('logramm' in w) or (' g' in w) or (' kg' in w) or ("gram" in w) or ("rams" in w):
                            Weight = w.strip()
                            break
                    else:
                        if ('ounds' in w) or ("unces" in w) or ('grams' in w):
                            Weight = w.strip()
                            break
                else:
                    Weight = None
            if Weight is None:
                Weight = self.get_wp('Weight')
            print('Weight::L', Weight)
            if Weight:
                Weight_ = Weight.replace("\u200e", '').replace('‎', '').strip()
                weight_str = Weight
                if len(Weight_) > 150:
                    weight = None
                elif ';' in Weight_:
                    weight_1 = Weight_.split(";")
                    weight = weight_1[1].replace("\u200e", '').strip()
                else:
                    weight = Weight_
                if self.site_name == 'us':
                    if 'nce' in weight:
                        Weight = self.re_weight(weight)
                        Weight = float(Weight) / 16
                    elif 'unds' in weight:
                        Weight = self.re_weight(weight)
                        Weight = float(Weight)
                    elif ("ilograms" in weight):
                        Weight = self.re_weight(weight)
                        Weight = float(Weight) / 0.454
                    elif ("grams" in weight.lower()):
                        Weight = self.re_weight(weight)
                        Weight = float(Weight) / 1000
                        Weight = float(Weight) / 0.454
                    else:
                        Weight = self.re_weight(weight)
                        Weight = float(Weight)
                else:
                    if "ilogram" in Weight or "kg" in Weight:
                        Weight = self.re_weight(weight)
                        Weight = float(Weight) * 1000
                    elif 'ram' in Weight or " g" in Weight:
                        Weight = self.re_weight(weight)
                        Weight = float(Weight)
                    else:
                        Weight = None
            else:
                Weight = None
                weight_str = None
        except:
            Weight = None
            weight_str = None
        # 体积
        try:
            for i in ASIN_XPATH['Package']:
                Package_list = self.response_s.xpath(i)
                if Package_list:
                    p = Package_list[0]
                    if 'nche' in p or "cm" in p or 'centimetres' in p or (p.count('x') == 2 and '"D' in p) \
                            or (p.count('"L') == 1 and '"W' in p) or (p.count('"H') == 1 and '"W' in p):
                        Package = p.strip()
                        break
                else:
                    Package = None
            if Package is None:
                Package = self.get_wp('Package')
            print('Package::L', Package)
            if Package:
                if self.site_name == 'us':
                    if ("inches" in Package and ";" in Package):
                        Package_1 = Package.split(";")
                        Package = Package_1[0]
                    else:
                        if ("inches" in Package and ";" in Package):
                            Package_1 = Package.split(";")
                            Package = Package_1[0]
                        elif "cm" in Package or "inches" in Package:
                            Package = Package
                        else:
                            Package = Package.strip()
                elif self.site_name in ['de', 'uk', 'fr', 'es', 'it']:
                    if ("cm" in Package and ";" in Package):
                        Package_1 = Package.split(";")
                        Package = Package_1[0]
                    elif ("cm" in Package and ";" in Package):
                        Package_1 = Package.split(";")
                        Package = Package_1[0]
                    elif "cm" in Package or 'centimetres' in p:
                        Package = Package
                    else:
                        Package = Package.strip()
            else:
                Package = None
        except:
            Package = None
        # 上架时间
        try:
            amazon_launch_time = None
            tiem_dict = {"June": "6", "April": "4", "January": "1", "October": "10",
                         "November": "11", "August": "8",
                         "March": "3", "December": "12", "July": "7", "September": "9",
                         "Feb": '2', "May": "5", "February": '2'}
            if self.site_name == 'us':
                for i in ASIN_XPATH['Date_time']:
                    Date_time_list = self.response_s.xpath(i)
                    if Date_time_list:
                        Date_time = Date_time_list[0].strip()
                        for char in ['(', ')', '\u200e', ',', '.']:
                            Date_time = Date_time.replace(char, '')
                        break
                    else:
                        Date_time = "0"
                launch_time = None

                if len(Date_time) > 1:
                    print('Date_time::L', Date_time)
                    time_s = re.findall(r"(.*?) ", Date_time)
                    time_ss = time_s[0]
                    amazon_launch_time = time_ss
                    t1 = tiem_dict.get(time_ss)
                    t2 = Date_time.replace(time_ss, t1)

                    try:
                        d2 = datetime.datetime.strptime(t2, '%m %d %Y')  # 2007-06-28 00:00:00
                    except:
                        d2 = datetime.datetime.strptime(t2, '%d %m %Y')  # 2007-06-28 00:00:00
                    launch_time = str(d2)

                if launch_time is None:
                    for i in ASIN_XPATH['Date_time2']:
                        data_list = self.response_s.xpath(i)
                        if data_list:
                            for data in data_list:
                                for time_ in tiem_dict.keys():
                                    if time_ in data:
                                        if "(" in data and ')' in data:
                                            data = re.findall(r' \((.*)', data)[0]
                                        chars_to_remove = ['(', ')', '\u200e', ',', '.']
                                        for char in chars_to_remove:
                                            data_time = data_time.replace(char, '')
                                        month_ = re.findall(r'[A-Za-z]', data_time)
                                        month_str = ''.join(month_)
                                        _month = data_time.replace(month_str, tiem_dict.get(month_str))
                                        amazon_launch_time = _month
                                        try:
                                            year_moth_day = datetime.datetime.strptime(_month, '%m %d %Y')
                                        except:
                                            year_moth_day = datetime.datetime.strptime(_month, '%d %m %Y')
                                        launch_time = str(year_moth_day)
                            break
                        else:
                            launch_time = None

            elif self.site_name in ['de', 'fr', 'it', 'uk', 'es']:
                if self.site_name == 'de':
                    tiem_dict = {"June": "6", "April": "4", "January": "1", "Oktober": "10", "October": "10",
                                 "November": "11", "August": "8", "Juni": "6", "Januar": "1",
                                 "März": "3", "Dezember": "12", "Juli": "7", "September": "9",
                                 "Feb": '2', "Mai": "5", "Februar": '2'}
                elif self.site_name == 'fr':
                    tiem_dict = {"septembre": "9", "novembre": "11", "août": "8", "mars": "3", "juillet": "7",
                                 "octobre": "10",
                                 "mai": "5", "février": "2", "avril": "4", "décembre": "12", "janvier": "1",
                                 "juin": "6",
                                 }
                elif self.site_name == 'it':
                    tiem_dict = {"novembre": "11", "agosto": "8", "aprile": "4", "marzo": "3", "maggio": "5",
                                 "febbraio": "2",
                                 "luglio": "7", "settembre": "9", "gennaio": "1", "ottobre": "10", "dicembre": "12",
                                 "giugno": "6"
                                 }
                elif self.site_name == 'uk':
                    tiem_dict = {"June": "6", "Jun": "6", "April": "4", "Apr": "4", "January": "1", "Jan": "1",
                                 "October": "10",
                                 "November": "11", "Nov": "11", "August": "8", "Oct": "10", "Aug": "8",
                                 "March": "3", "Mar": "3", "December": "12", "Dec": "12", "July": "7",
                                 "September": "9", "Sept": "9",
                                 "Feb": '2', "May": "5", "February": '2'}
                elif self.site_name == 'es':
                    tiem_dict = {"septiembre": "9", "noviembre": "11", "julio": "7", "agosto": "8", "marzo": "3",
                                 "enero": "1",
                                 "octubre": "10", "febrero": "2", "mayo": "5", "diciembre": "12", "abril": "4",
                                 "junio": "6"
                                 }
                for i in ASIN_XPATH['Date_time']:
                    Date_time_list = self.response_s.xpath(i)

                    if Date_time_list:
                        Date_time = Date_time_list[0].strip()
                        chars_to_remove = ['(', ')', '\u200e', ',', '.']
                        for char in chars_to_remove:
                            Date_time = Date_time.replace(char, '')
                        break
                    else:
                        Date_time = "0"
                launch_time = None
                if len(str(Date_time)) > 1:
                    time_s = re.findall(r"(.*?) ", Date_time)
                    time_ss = time_s[1]
                    t1 = tiem_dict.get(time_ss)
                    t2 = Date_time.replace(time_ss, t1)
                    amazon_launch_time = t2
                    try:
                        d2 = datetime.datetime.strptime(t2, '%m %d %Y')
                    except:
                        d2 = datetime.datetime.strptime(t2, '%d %m %Y')
                    launch_time = str(d2)
                if launch_time is None:
                    for i in ASIN_XPATH['Date_time2']:
                        datas_list = self.response_s.xpath(i)
                        if datas_list:
                            for data in datas_list:
                                for time_ in tiem_dict.keys():
                                    if time_ in data:
                                        if "(" in data and ')' in data:
                                            data = re.findall(r' \((.*)', data)[0]
                                        chars_to_remove = ['(', ')', '\u200e', ',', '.']
                                        for char in chars_to_remove:
                                            data_time = data_time.replace(char, '')
                                        month_ = re.findall(r'[A-Za-z]', data_time)
                                        month_str = ''.join(month_)
                                        _month = data_time.replace(month_str, tiem_dict.get(month_str))
                                        amazon_launch_time = _month
                                        try:
                                            year_moth_day = datetime.datetime.strptime(_month, '%m %d %Y')
                                        except:
                                            year_moth_day = datetime.datetime.strptime(_month, '%d %m %Y')
                                        launch_time = str(year_moth_day)
                            break
                        else:
                            launch_time = None
        except Exception as e:
            print(e, '时间报错')
            launch_time = None
            amazon_launch_time = None
        # QA
        for i in ASIN_XPATH['QA_num']:
            askATF_list = self.response_s.xpath(i)
            if askATF_list:
                try:
                    QA_num = int(re.findall(r'(\d+) ', askATF_list[0])[0])
                except:
                    QA_num = 0
                break
            else:
                QA_num = 0

        # 绑定购买 asin
        add_cart_asin_list = []
        together_asin_list = []
        for i in ASIN_XPATH['buy_asin']:
            buy_asin_list = self.response_s.xpath(i)
            set_list = []
            if buy_asin_list:
                for buy_asin in buy_asin_list:
                    together_asin_ = re.findall(r'/dp/(.*?)/ref', buy_asin)
                    if len(together_asin_) < 1:
                        together_asin_ = re.findall(r'dp%2F(.*?)%', buy_asin)
                    if together_asin_:
                        if together_asin_[0] in set_list:
                            continue
                        set_list.append(together_asin_[0])
                        together_asin_dict = {}
                        add_cart_asin_list.append(together_asin_[0])
                        together_asin_titles = self.response_s.xpath(
                            f"//a[contains(@href,'{together_asin_[0]}')]//span/text()")
                        together_asin_title = together_asin_titles[0] if together_asin_titles else None
                        together_asin_prices = self.response_s.xpath(
                            f"//a[contains(@href,'{together_asin_[0]}')]/parent::div/following-sibling::div//span[contains(@class,'price')]/span/text()")
                        together_asin_price = together_asin_prices[0] if together_asin_prices else None
                        together_asin_dict['together_asin'] = together_asin_[0]
                        together_asin_dict['together_asin_title'] = together_asin_title
                        together_asin_dict['together_asin_price'] = together_asin_price
                        together_asin_list.append(together_asin_dict)

        if add_cart_asin_list:
            add_cart_asin_list = list(set(add_cart_asin_list))
            together_asin = ','.join(add_cart_asin_list)
        else:
            together_asin = None
        if together_asin_list:
            together_asin_json = json.dumps(together_asin_list, ensure_ascii=False)
        else:
            together_asin_json = None
        # 捆绑asin Make it a bundle
        pba_lob_asin_data_list = []
        for i in ASIN_XPATH['pba_lob_asin_list']:
            pba_lob_asin_list = self.response_s.xpath(i)
            if pba_lob_asin_list:
                for pba_lob_asin in pba_lob_asin_list:
                    pba_lob_asin_data_json = {}
                    lob_asin = re.findall(r'dp\/(.*)', pba_lob_asin)
                    if lob_asin:
                        lob_asin_imgs = self.response_s.xpath(
                            f"//div[contains(@id,'lob-carousel')]//a[contains(@href,'{lob_asin[0]}')]//img/@src")
                        lob_asin_img = lob_asin_imgs[0] if lob_asin_imgs else None
                        lob_asin_prices = self.response_s.xpath(
                            f"//div[contains(@id,'lob-carousel')]//a[contains(@href,'{lob_asin[0]}')]/following-sibling::div//span[contains(@class,'pba-lob-bundle-buy-price')]/span/text()")
                        lob_asin_price = lob_asin_prices[0] if lob_asin_prices else None
                        lob_asin_titles = self.response_s.xpath(
                            f"//div[contains(@id,'lob-carousel')]//a[contains(@href,'{lob_asin[0]}')]/div//span[contains(@class,'pba-lob-bundle-title')]//text()")
                        lob_asin_title = lob_asin_titles[0] if lob_asin_titles else None
                        lob_asin_total_comments = self.response_s.xpath(
                            f"//div[contains(@id,'lob-carousel')]//a[contains(@href,'{lob_asin[0]}')]/i/following-sibling::span/text()")
                        lob_asin_total_comment = lob_asin_total_comments[0] if lob_asin_total_comments else None
                        pba_lob_asin_data_json['lob_asin'] = lob_asin[0]
                        pba_lob_asin_data_json['lob_asin_img'] = lob_asin_img
                        pba_lob_asin_data_json['lob_asin_price'] = lob_asin_price
                        pba_lob_asin_data_json['lob_asin_title'] = lob_asin_title
                        pba_lob_asin_data_json['lob_asin_total_comment'] = lob_asin_total_comment
                        pba_lob_asin_data_list.append(pba_lob_asin_data_json)
                break
        if pba_lob_asin_data_list:
            lob_asin_json = json.dumps(pba_lob_asin_data_list, ensure_ascii=False)
        else:
            lob_asin_json = None
        # Compare with similar items/Discover similar items版块 相似产品
        # 亚马逊根据产品相似性和买家购物习惯自动抓取推荐，把高度相似的产品进行对比
        # 示例ASIN链接：https://www.amazon.com/dp/B0C59BYVTQ
        compare_similar_list = []
        for i in ASIN_XPATH['compare_similar_asin_list']:
            compare_similar_asin_list = self.response_s.xpath(i)
            if compare_similar_asin_list:
                for compare_similar_asin in compare_similar_asin_list:
                    compare_asin_dict = {}
                    try:
                        compare_asin = compare_similar_asin.split('-')[1]
                        compare_asin_srcs = \
                            self.response_s.xpath(f'//div[contains(@id,"{compare_similar_asin}")]//img/@src')
                        compare_asin_src = compare_asin_srcs[0] if compare_asin_srcs else None
                        compare_asin_titles = self.response_s.xpath(
                            f'//div[contains(@id,"{compare_similar_asin}")]/parent::div/following-sibling::div//span/text()|//div[contains(@id,"{compare_similar_asin}")]//img/@alt')
                        compare_asin_title = compare_asin_titles[0] if compare_asin_titles else None
                        compare_asin_dict['compare_asin'] = compare_asin
                        compare_asin_dict['compare_asin_src'] = compare_asin_src
                        compare_asin_dict['compare_asin_title'] = compare_asin_title
                        compare_similar_list.append(compare_asin_dict)
                    except:
                        pass
                break
        if compare_similar_list:
            compare_similar_asin_json = json.dumps(compare_similar_list, ensure_ascii=False)
        else:
            compare_similar_asin_json = None
        # 获取按功能划分的客户评分数据
        for i in ASIN_XPATH['customer_reviews_list']:
            customer_reviews_list = self.response_s.xpath(i)
            if customer_reviews_list:
                customer_reviews_json = self.get_customer_reviews(customer_reviews_list)
                break
            else:
                customer_reviews_json = None
        # 库存
        if self.site_name == 'us' or self.site_name == 'uk':
            page_inventory = 0
            for i in ASIN_XPATH['invnetroySelect']:
                page_inventory_list = self.response_s.xpath(i)
                if page_inventory_list:
                    if self.pageinventory(page_inventory_list):
                        break
                    if len(page_inventory_list) > 20:
                        page_inventory = 1
                    elif len(page_inventory_list) >= 1:
                        page_inventory = 2
                    else:
                        page_inventory_re = re.findall('Only (\d+) left in stock', page_inventory_list[0].strip(),
                                                       re.I)
                        if page_inventory_re:
                            page_inventory = page_inventory_re[0]
                            if int(page_inventory) > 20:
                                page_inventory = 1  # 充足
                            elif (int(page_inventory) > 0) and (int(page_inventory) <= 20):
                                page_inventory = 2  # 正常
                            else:
                                page_inventory = 3  # 缺货
                        else:
                            page_inventory = 3
                    break
            if page_inventory == 0 or page_inventory == 3:
                page_inventory = self.re_buy_Box(ASIN_XPATH)

        elif self.site_name == 'de':
            page_inventory = 0
            for i in ASIN_XPATH['invnetroySelect']:
                page_inventory_list = self.response_s.xpath(i)
                if page_inventory_list:
                    if self.pageinventory(page_inventory_list):
                        break
                    if len(page_inventory_list) > 20:
                        page_inventory = 1
                    elif len(page_inventory_list) >= 1:
                        page_inventory = 2
                    else:
                        page_inventory_re = re.findall('Nur noch (\d+) auf Lager', page_inventory_list[0].strip(),
                                                       re.I)
                        if page_inventory_re:
                            page_inventory = page_inventory_re[0]
                            if int(page_inventory) > 20:
                                page_inventory = 1  # 充足
                            elif (int(page_inventory) > 0) and (int(page_inventory) <= 20):
                                page_inventory = 2  # 正常
                            else:
                                page_inventory = 3  # 缺货
                        else:
                            page_inventory = 3
                    break
            if page_inventory == 0:
                for i in ASIN_XPATH['invnetroySelect2']:
                    buy_Box_list = self.response_s.xpath(i)
                    if len(buy_Box_list) > 0:
                        if "In den Einkaufswagen" in buy_Box_list[0] or "Add to Cart" in \
                                buy_Box_list[0]:
                            page_inventory = 2
                        else:
                            page_inventory = 3
                        break
            if page_inventory == 0:
                availability = self.response_s.xpath('//div[@id="availability-string"]/span/text()')
                if availability:
                    if 'Auf Lager' in availability[0]:
                        page_inventory = 2  # 正常
                    else:
                        page_inventory = 3
                else:
                    page_inventory = 3

        elif self.site_name == 'fr':
            page_inventory = 0
            for i in ASIN_XPATH['invnetroySelect']:
                page_inventory_list = self.response_s.xpath(i)
                if page_inventory_list:
                    if self.pageinventory(page_inventory_list):
                        break
                    if len(page_inventory_list) > 20:
                        page_inventory = 1
                    elif len(page_inventory_list) >= 1:
                        page_inventory = 2
                    else:
                        page_inventory_re = re.findall('Only (\d+) left in stock', page_inventory_list[0].strip(),
                                                       re.I)
                        if page_inventory_re:
                            page_inventory = page_inventory_re[0]
                            if int(page_inventory) > 20:
                                page_inventory = 1  # 充足
                            elif (int(page_inventory) > 0) and (int(page_inventory) <= 20):
                                page_inventory = 2  # 正常
                            else:
                                page_inventory = 3  # 缺货
                    break
            if page_inventory == 0 or page_inventory == 3:
                page_inventory = self.re_buy_Box(ASIN_XPATH)
            else:
                page_inventory = 3

        elif self.site_name == 'it':
            page_inventory = 0
            for i in ASIN_XPATH['invnetroySelect']:
                page_inventory_list = self.response_s.xpath(i)
                if page_inventory_list:
                    if self.pageinventory(page_inventory_list):
                        break
                    if len(page_inventory_list) > 20:
                        page_inventory = 1
                    elif len(page_inventory_list) >= 1:
                        page_inventory = 2
                    else:
                        page_inventory_re = re.findall('solo (\d+)', page_inventory_list[0].strip(), re.I)
                        if page_inventory_re:
                            page_inventory = page_inventory_re[0]
                            if int(page_inventory) > 20:
                                page_inventory = 1  # 充足
                            elif (int(page_inventory) > 0) and (int(page_inventory) <= 20):
                                page_inventory = 2  # 正常
                            else:
                                page_inventory = 3  # 缺货
                        else:
                            page_inventory = 3
                    break
            if page_inventory == 0:
                page_inventory = self.re_buy_Box(ASIN_XPATH)
            else:
                page_inventory = 3

        elif self.site_name == 'es':
            page_inventory = 0
            for i in ASIN_XPATH['invnetroySelect']:
                page_inventory_list = self.response_s.xpath(i)
                if page_inventory_list:
                    if self.pageinventory(page_inventory_list):
                        break
                    if len(page_inventory_list) > 20:
                        page_inventory = 1
                    elif len(page_inventory_list) >= 1:
                        page_inventory = 2
                    else:
                        page_inventory_re = re.findall('Only (\d+) left in stock', page_inventory_list[0].strip(), re.I)
                        if page_inventory_re:
                            page_inventory = page_inventory_re[0]
                            if int(page_inventory) > 20:
                                page_inventory = 1  # 充足
                            elif (int(page_inventory) > 0) and (int(page_inventory) <= 20):
                                page_inventory = 2  # 正常
                            else:
                                page_inventory = 3  # 缺货
                        else:
                            page_inventory = 3
                else:
                    if page_inventory == 0:
                        page_inventory = self.re_buy_Box(ASIN_XPATH)
                    else:
                        page_inventory = 3
        # 卖家类型
        buy_box_seller_type = None
        for i in ASIN_XPATH['td_0_text']:
            td_0_text = self.response_s.xpath(i)
            if td_0_text:
                break
            else:
                td_0_text = []
        for i in ASIN_XPATH['td_1_text']:
            td_1_text = self.response_s.xpath(i)
            if td_1_text:
                break
            else:
                td_1_text = []
        if td_1_text and td_0_text:
            if self.site_name in ['us', 'uk', 'es', 'fr']:
                buy_box_seller_type = self.re_buy_sller(td_1_text, td_0_text)
            elif self.site_name == 'de':
                if ('Verkauf und Versand durch Amazon.' in td_0_text[0].strip()) or ('Amazon' in td_0_text[0].strip()):
                    buy_box_seller_type = 1
                elif ('ein Amazon Unternehmen' in td_1_text[0].strip()) or (
                        'ein Amazon Unternehmen' in td_0_text[0].strip()):
                    buy_box_seller_type = 1
                else:
                    if len(td_1_text) > 0:
                        if "Versand durch Amazon" in td_1_text[0].strip() or ('Amazon' in td_0_text[0].strip()):
                            buy_box_seller_type = 2

                        else:
                            buy_box_seller_type = 3
                    else:
                        buy_box_seller_type = 3

        if buy_box_seller_type is None:
            for i in ASIN_XPATH['by_text']:
                by_text = self.response_s.xpath(i)
                if by_text:
                    by_text = ''.join(by_text)
                    if self.site_name == 'de':
                        if "Verkauft von" in by_text and 'Versand durch' in by_text and 'Amazon' in by_text:
                            buy_box_seller_type = 1
                        else:
                            buy_box_seller_type = 4
                        break
                    elif self.site_name == 'it':
                        if "Vendito" in by_text and "Amazon" not in by_text:
                            buy_box_seller_type = 3
                        elif "Vendito" in by_text and "Amazon" in by_text:
                            buy_box_seller_type = 1
                        else:
                            buy_box_seller_type = 4  # 无
                        break
                    elif self.site_name == 'es':
                        if "Vendido y enviado por" in by_text and "Amazon" not in by_text:
                            buy_box_seller_type = 3
                        elif "Vendido y enviado por" in by_text and "Amazon" in by_text:
                            buy_box_seller_type = 1
                        else:
                            buy_box_seller_type = 4  # 无
                    else:
                        if "Ships from and sold by" in by_text and "Amazon" not in by_text:
                            buy_box_seller_type = 3
                        elif "Ships from and sold by" in by_text and "Amazon" in by_text:
                            buy_box_seller_type = 1
                        else:
                            buy_box_seller_type = 4  # 无
                        break
                else:
                    buy_box_seller_type = 4
        # 变体 # 当前亚马逊的真实asin。和请求asin对比是否一样
        current_asin = None
        current_Asin_list = re.findall(r'currentAsin(.*?),', self.page_source)
        if current_Asin_list:
            currentAsin_list = re.findall(r'(?:[A-Z0-9]{10}|[0-9]{10})', current_Asin_list[0])
            if currentAsin_list:
                if currentAsin_list[0].strip() == self.asin:
                    pass
                else:
                    current_asin = currentAsin_list[0].strip()
        print('current_asin:', current_asin)
        variat_list = []
        try:
            dimensions = re.findall(r'"dimensions" : \[(.*?)\],', self.page_source)
            DisplayDatas_list = re.findall(r"dimensionValuesDisplayData(.*?)},", self.page_source)
            parentAsin_list = re.findall(r"parentAsin=(.*?)&", self.page_source)
            if parentAsin_list:
                parentAsin = parentAsin_list[0].strip()
            else:
                parentAsin_list = re.findall(r'parentAsin\":\"(.*?)\",', self.page_source)
                if parentAsin_list:
                    parentAsin = parentAsin_list[0].strip()
                else:
                    parentAsin = None

            if DisplayDatas_list:
                try:
                    DisplayDatas_list.remove('":{')
                except:
                    pass
                if len(DisplayDatas_list) > 1:
                    if self.asin in DisplayDatas_list[0]:
                        DisplayData = DisplayDatas_list[0]
                    else:
                        DisplayData = DisplayDatas_list[-1]
                else:
                    DisplayData = DisplayDatas_list[0]
                data = re.findall(r': (.*)', DisplayData + '}')
                dict_data = json.loads(data[0])
                dimension_names = [dimension.strip('"') for dimension in dimensions[0].split(',')]
                if len(dimension_names) != len(dict_data[list(dict_data.keys())[0]]):
                    raise ValueError(
                        f"Dimensions数量与数据不匹配，期望{len(dict_data[list(dict_data.keys())[0]])}个，实际{len(dimension_names)}个")
                for asin_var in dict_data:
                    if len(asin_var) != 10:
                        i_list = re.findall(r'(?:[A-Z0-9]{10}|[0-9]{10})', asin_var)
                        if i_list:
                            i = i_list[0]
                        else:
                            i = asin_var[-10:]
                    else:
                        i = asin_var

                    is_not_on_sale_list = self.response_s.xpath(
                        f"//li[@data-defaultasin='{i}']/@class|//li[@data-asin='{i}']/@data-initiallyunavailable|//li[@data-defaultasin='{i}']/@data-csa-c-content-id|//li[contains(@data-dp-url,'{i}')]/@data-csa-c-slot-id|//option[contains(@value,'{i}')]/@data-a-css-class")
                    is_not_on_sale_srt = ''.join(is_not_on_sale_list)
                    if "Available" in is_not_on_sale_srt or 'false' in is_not_on_sale_srt:
                        state = 1
                    else:
                        state = 2
                    variation_data = dict_data[asin_var]
                    variation_info = dict(zip(dimension_names, variation_data))
                    variat_list.append(i)
                    other_var = variation_info.get('number_of_items')
                    if other_var is None:
                        other_var = variation_info.get('team_name')
                    self.add_variation(i, variation_info.get('color_name'), variation_info.get('size_name'),
                                       variation_info.get('style_name'), state, parentAsin, other_var)
        except:
            pass

        # 月销具体数值。如果有值拼接一起
        # buy_sales_num_list
        # 月销具体数值。如果有值拼接一起
        # buy_sales_num_list



        for i in ASIN_XPATH['buy_sales_num_list']:
            buySales_num_list = self.response_s.xpath(i)
            if buySales_num_list:
                buySales_num = buySales_num_list[0].strip().replace(' ', '')
                break
            else:
                buySales_num = None


        # asin详情 月销售量
        for i in ASIN_XPATH['buy_sales_list']:
            buySales_list2 = self.response_s.xpath(i)
            if buySales_list2:
                buySales = buySales_list2[0].strip().replace(' ', '')
                if buySales_num:
                    asin_not_Sales = buySales_num + buySales
                else:
                    asin_not_Sales = buySales
                break
            else:
                asin_not_Sales = None
        print('asin_not not _Sales:',asin_not_Sales)
        buySales_list = self.response_s.xpath(
            f'//div[@data-csa-c-asin="{self.asin}"]//span[contains(@id,"bought")]//text()|//span[contains(@id,"bought")]//text()')
        print('buySales_list:::', buySales_list)
        if buySales_list:
            buy_Sales = ''.join(buySales_list)
            buySales = buy_Sales.strip().replace(' ', '')
        else:
            buySales = None
        if buySales:
            if self.site_name == 'us' or self.site_name == 'uk':
                if 'boughtinpast' in buySales:
                    pass
                else:
                    buySales = None
            elif self.site_name == 'de':
                if 'MalimletztenMonat' in buySales:
                    pass
                else:
                    buySales = None
        if buySales:
            if len(buySales) > 50:
                buySales = None

        asin_buySales_list = []
        if asin_not_Sales and buySales is None:
            asin_buy = self.asin
            asin_buySales = asin_not_Sales
        else:
            asin_buy = None
            asin_buySales = None
        if asin_buy and asin_buySales:
            asin_buySales_list.append([asin_buy, asin_buySales, self.date_info])

        # 跟卖
        for i in ASIN_XPATH['box_follow_list']:
            buyBox_num_list = self.response_s.xpath(i)
            if buyBox_num_list:
                buyBox_num_str = ''.join(buyBox_num_list)
                sellers_num_list = re.findall(r'\((\d+)\)', buyBox_num_str)
                if sellers_num_list:
                    sellers_num = int(sellers_num_list[0])
                else:
                    sellers_num = 1
                break
            else:
                sellers_num = 1

        for i in ASIN_XPATH['buyBox_url']:
            buyBox_url = self.response_s.xpath(i)
            if buyBox_url:
                if "http://www" not in buyBox_url[0] and td_1_text:
                    if 'Amazon.com' not in td_1_text[0]:
                        buyBox_url = self.site_url + buyBox_url[0].strip()
                        if 'seller=' in buyBox_url:
                            scraper_url_list = re.findall(r'seller=(.*?)&', buyBox_url)
                            if len(scraper_url_list) == 0:
                                scraper_url_list = re.findall(r'seller=(.*)', buyBox_url)
                            if scraper_url_list:
                                seller_id = scraper_url_list[0]
                            else:
                                seller_id = None
                        else:
                            seller_id = None
                else:
                    buyBox_url = None
                    seller_id = None

                break
            else:
                buyBox_url = None
                seller_id = None

        if rank:
            try:
                rank = rank.replace(',', '').replace('.', '')
                rank = int(rank)
            except:
                rank = None
        else:
            rank = None
        if rating:
            try:
                rating = rating.replace(',', '.')
                rating = round(float(rating), 2)
            except:
                rating = 0

        if total_comments:
            try:
                total_comments = total_comments.replace(',', '').replace('.', '')
                total_comments = int(total_comments)
            except:
                total_comments = 0
        if Package:
            Package = Package.replace("\u200e", '')

        if price:
            try:
                price = price.replace('\xa0', '').replace(",", '').strip()
                if (len(price) > 9) or ('Currently' in price):
                    price = None
                if price:
                    price = round(float(price), 2)
            except:
                price = -1
        if Weight:
            try:
                Weight = round(Weight, 3)
                if len(str(Weight)) > 11:
                    Weight = None
            except:
                Weight = 0

        if coupon_int:
            try:
                coupon_int = round(coupon_int, 2)
            except:
                coupon_int = 0
        if self.site_name not in ['de']:
            if Join_Prime_int:
                try:
                    Join_Prime_int = round(Join_Prime_int, 2)
                except:
                    Join_Prime_int = 0
        if Save_price_int:
            try:
                Save_price_int = round(Save_price_int, 2)
            except:
                Save_price_int = 0
        if category is None:
            category = "无"
        category = category.replace('\xa0', '')
        account_url = None
        account_name = None
        if buyBox_url is not None and seller_id is not None and td_1_text:
            buyBox_name = td_1_text[0]
            if 'Amazon.com' not in td_1_text[0]:
                lock = Lock()
                lock.acquire()
                account_name = buyBox_name.replace("%", "%%")
                account_name = account_name.strip()
                account_url = f'{self.site_url}/s?me={seller_id}'
                self.buyBox_list.append([seller_id, account_name, buyBox_url])
                lock.release()
        else:
            buyBox_name = None
        if buyBox_name is not None and seller_id is not None:
            lock = Lock()
            lock.acquire()
            account_name = buyBox_name.replace("%", "%%")
            account_name = account_name.strip()
            account_url = f'{self.site_url}/s?me={seller_id}'
            self.buyBoxname_asin_list.append([account_name, self.asin, seller_id])
            lock.release()
        if launch_time:
            launch_time = launch_time.replace('00:00:00', '').strip()
        if td_0_text:
            td_text = td_0_text[0]
            if 'Fulfilled by' in td_text:
                fulfilled_by = td_text
            else:
                fulfilled_by = None
        else:
            td_text = None
            fulfilled_by = None
        if td_1_text:
            td1_text = td_1_text[0].replace('Ships from and sold by ', '')
        else:
            td1_text = None
        seller_dict = {
            "seller_id": seller_id,
            "ship_from": td_text,
            "sold_by": td1_text,
            "fulfilled_by": fulfilled_by
        }

        cleaned_data = {k: (v.strip() if isinstance(v, str) and v.strip() else None) for k, v in seller_dict.items()}

        if all(value is None for value in cleaned_data.values()):
            seller_json = None
        else:
            seller_json = json.dumps(cleaned_data, ensure_ascii=False)
        review_json = self.get_review(self.response_s, self.site_name)
        item = {'asin': self.asin, 'week': self.week, 'month': self.month, 'title': title, 'img_url': image,
                'rating': rating,
                'total_comments': total_comments,
                'price': price, "rank": rank, 'category': category, 'launch_time': launch_time,
                'amazon_launch_time': amazon_launch_time,
                'volume': Package,
                'weight': Weight, "page_inventory": page_inventory,
                "buy_box_seller_type": buy_box_seller_type,
                "asin_vartion_list": len(self.asin_variation_list), 'title_len': title_len,
                'img_num': video_img_count_num, 'img_type': video_A_type, 'activity_type': deal_type,
                'one_two_val': coupon_int, 'eight_val': Save_price_int,
                'qa_num': QA_num, 'five_star': int(star5), 'four_star': int(star4), 'three_star': int(star3),
                'two_star': int(star2), 'one_star': int(star1), 'low_star': low_star, 'together_asin': together_asin,
                'brand': Brand, 'ac_name': ac_name, 'material': Material, 'node_id': node_id,
                'data_type': self.data_type_asin,
                'sp_num': sp_type, 'describe': ppd_data_five, 'date_info': self.date_info,
                'all_img_video_list': self.all_img_video_list, 'asin_variation_list': self.asin_variation_list,
                'buyBoxname_asin_list': self.buyBoxname_asin_list, 'buyBox_list': self.buyBox_list,
                'bs_category_asin_list_pg': self.bs_category_asin_list_pg, 'weight_str': weight_str,
                'package_quantity': package_quantity,
                'pattern_name': pattern_name, 'seller_id': seller_id, 'variat_num': len(variat_list),
                'best_sellers_rank': best_sellers_rank, 'best_sellers_herf': best_sellers_herf,
                'account_url': account_url, 'account_name': account_name, 'parentAsin': parentAsin,
                'sellers_num': sellers_num,
                'all_best_sellers_herf': all_best_sellers_herf, 'product_description': product_description,
                'buySales': buySales,
                'image_view': image_view, 'product_json': product_json, 'productdetail_json': productdetail_json,
                'review_ai_text': review_ai_text, 'review_label_json': review_label_json,
                'lob_asin_json': lob_asin_json, 'sp_initial_seen_asins_json': sp_seen_asins_json,
                'sp_4stars_initial_seen_asins_json': sp_4stars_seen_asins_json,
                'sp_delivery_initial_seen_asins_json': sp_delivery_seen_asins_json,
                'compare_similar_asin_json': compare_similar_asin_json,
                'customer_reviews_json': customer_reviews_json, 'together_asin_json': together_asin_json,
                'min_match_asin_json': min_match_asin_json, 'seller_json': seller_json, 'current_asin': current_asin,
                'div_id_list': div_id_list, 'bundles_this_asins_data_json': bundles_this_asins_data_json,
                'video_m3u8': video_m3u8, 'result_list_json': result_list_json,
                'bundle_asin_component_json': bundle_asin_component_json,
                "review_json_list": review_json, 'asin_buySales_list': asin_buySales_list}
        if self.site_name == 'us':
            item['three_four_val'] = Join_Prime_int
        elif self.site_name in ['uk', 'fr', 'it', 'es']:
            item['three_four_val'] = Promotion_Message_int
            item['five_six_val'] = Join_Prime_int
        elif self.site_name == 'de':
            item['three_four_val'] = Sparen_int
            item['five_six_val'] = Aktuelle_Angebote_int
        return item


# if __name__ == '__main__':
#     ParseAsinUs().xpath_html()
