import re
import json
import time
import copy
import logging
import unicodedata
from urllib.parse import urlparse
from scrapy.selector import Selector
from amazon_spider.utils.utils import time_ch
from amazon_spider.xpath_dict.amazon_detail import ASIN_XPATH
from amazon_spider.utils.time_disposal import get_keepa_time, keepa_time_disposal


class AmazonDetailExtractor:

    def __init__(self, site, url="https://www.amazon.com"):
        self.site = site
        self.url_ = url
        self.weight_dict = {
            'Kilograms': "公斤",
            'Kilogramm': "千克",
            'kilogrammes': "公斤",
            'Kilogrammes': "公斤",
            'Kg': "千克",
            'kilogramos': "公斤",
            'Kilogramos': "公斤",
            'gramm': "克",
            'grammes': "克",
            'grams': "克",
            'kg': "千克",
            'g': "克",
            'Gramm': "克",
            'ounces': "盎司",
            'Ounces': "盎司",
            'gramos': "克",
            'grammi': "克",
            'Grams': "克",
            'Grammes': "克",
            'Libbre': "磅",
            'pounds': "磅",
            'Pounds': "磅",
            'Livres': "本书",
            'Milligrams': "毫克",
            'oz': "盎司",
        }
        self.unitary_ratio_g = {
            "克": 1,
            "磅": 453.59237,
            "公斤": 1000,
            "千克": 1000,
        }

        self.unitary_ratio_pound = {
            "盎司": 0.062500,
            "磅": 1,
            "克": 0.0022046,
            "毫克": 0.00000220462262185,
            "公斤": 2.2046226,
            "斤": 1.1023113,
            "千克": 2.2046226,
        }
        self.res = None

    def extract_(self, text, site):
        if text:
            return "".join(list(set(text))).replace("\xa0", "")
        else:
            return ""

    def extract_other_seller_name(self, text, site):
        if text:
            return "|-|".join(text).replace("\xa0", "").strip()
        else:
            return ""

    def extract_other_sellers_id(self, text, site):
        if text:
            seller = []
            for i in text:
                if 'seller=' in i:
                    seller.append(i.split("seller=")[-1].split("&")[0])
                else:
                    seller.append('None')
            return "|-|".join(seller).replace("\xa0", "")
        else:
            return ""

    def extract_other_seller_buy_boy_type(self, text, site):
        from scrapy import Selector
        if text:
            t = []
            for i in text:
                other_seller_buy_boy_type = Selector(text=i).xpath(".//span[@class='a-size-small a-color-secondary mbc-delivery']/text()").extract()
                other_seller_buy_boy_type = [i for i in other_seller_buy_boy_type if "by" in i.replace("\\n", "").strip()]
                if other_seller_buy_boy_type:
                    other_seller_buy_boy_type = other_seller_buy_boy_type[0].split("by")[-1].strip().replace("Amazon.", "Amazon")
                    t.append(other_seller_buy_boy_type.replace("\\n", "").strip() or "")
                else:
                    t.append("")
            return "|-|".join(t)
            # return ""
        else:
            return ""

    def extract_sp_num(self, text, site):
        if text:
            sp = "$,%,^"
            for i in text:
                try:
                    sp_detail = json.loads(i)
                    if sp_detail.get("name") in ["sp_detail_carousel", "sp_detail2_carousel"]:
                        sp = sp.replace("$", str(sp_detail.get("set_size", "")))
                    elif sp_detail.get("name") in ["sp_detail-prime_theme_for_non_prime_members_carousel",
                                                   "sp_detail2-prime_theme_for_non_prime_members_carousel"]:
                        sp = sp.replace("^", str(sp_detail.get("set_size", "")))
                    elif sp_detail.get("name") in ["sp_detail_thematic-highly_rated_carousel",
                                                   "sp_detail2_thematic-highly_rated_carousel"]:
                        sp = sp.replace("%", str(sp_detail.get("set_size", "")))
                except:
                    if ("sp_detail_carousel" in i) or ("sp_detail2_carousel" in i):
                        sp = sp.replace("$", str(
                            i.split("set_size")[-1].split(",")[0].replace(":", "").replace('"', "").replace("&quot;",
                                                                                                            "")))
                    elif ("sp_detail2-prime_theme_for_non_prime_members_carousel" in i) or (
                            "sp_detail-prime_theme_for_non_prime_members_carousel" in i):
                        sp = sp.replace("^", str(
                            i.split("set_size")[-1].split(",")[0].replace(":", "").replace('"', "").replace("&quot;",
                                                                                                            "")))
                    elif ("sp_detail_thematic-highly_rated_carousel" in i) or (
                            "sp_detail2_thematic-highly_rated_carousel" in i):
                        sp = sp.replace("%", str(
                            i.split("set_size")[-1].split(",")[0].replace(":", "").replace('"', "").replace("&quot;",
                                                                                                            "")))
            if sp.replace("$", "0").replace("%", "0").replace("^", "0") == "0,0,0":
                return ""
            return sp.replace("$", "0").replace("%", "0").replace("^", "0")
        else:
            return ""

    def extract_ac_name(self, text, site):
        if text:
            return text[0]
        else:
            return ""

    def extract_search_category(self, text, site):
        if text:
            return [i for i in text if i.strip()][0]
        else:
            return ""

    def extract_img_url(self, text, site):
        if len(text) > 1:
            img_url = [i for i in text if i[-4:] != ".gif"]
        else:
            img_url = text
        img_list = []
        for i in img_url:
            url = i.split(".")
            if url[-1] != "png":
                del url[-2]
            img_list.append(".".join(url))
        return img_list

    def extract_img_url_min(self, text, site):
        if len(text) > 1:
            img_url = [i for i in text if i[-4:] != ".gif"]
        else:
            img_url = text
        return img_url

    def extract_title(self, text, site):
        title = " ".join(text).replace("\xa0", "")
        return title

    def extract_price(self, text, site):
        if text:
            if 'options from' in text[0] or 'option from' in text[0]:
                if "," in text[0] and "." in text[0]:
                    return text[0].split("$")[-1].replace(",", "")
                else:
                    return text[0].split("$")[-1].replace(",", ".")
            if 'from' in text[0]:
                u = {
                    "us": "$",
                    "uk": "£",
                    "fr": "€",
                    "de": "€",
                    "es": "€",
                    "it": "€"
                }
                if "." in text[0]:
                    t = text[0].replace("$", "").replace("from", "").replace(",", "").replace("\xa0", "").replace("€", "").replace("£",
                                                                                                               "").replace(
                        "ab", "").replace("zł", "").replace("TL", "").replace("kr", "").strip()
                    try:
                        float(t)  # B07JCXD9D2
                        return t
                    except:
                        return ""
                try:
                    return float(text[0].split(u.get(self.site))[-1].strip().replace(",", "."))
                except:
                    return ""
                # return text[0].split(u.get(self.site))[-1].strip().replace(",", ".")
            if "." in text[0]:
                t = text[0].replace("$", "").replace(",", "").replace("\xa0", "").replace("€", "").replace("£",
                                                                                                           "").replace(
                    "ab", "").replace("zł", "").replace("TL", "").replace("kr", "").strip()
            elif len(text[0]) > 10:
                t = text[0].split(" ")[-1].replace("$", "").replace(",", ".").replace("\xa0", "").replace("€",
                                                                                                          "").replace(
                    "£", "").replace("ab", "").replace("zł", "").replace("TL", "").replace("kr", "").strip()
            else:
                t = text[0].replace("zł", "").replace("TL", "").replace("kr", "").replace("$", "").replace(",", ".").replace("\xa0", "").replace("€", "").replace("£",
                                                                                                            "").replace(
                    "ab", "").strip()
            try:
                float(t)
                return t
            except:
                return ""
        else:
            return ""

    def extract_rating(self, text, site):
        if text:
            if "$" in text[0]:
                return ""
            return text[0].strip().split(" ")[0].replace(",", ".")
        else:
            return ""

    def extract_describe(self, text, site):
        if text:
            return "|-|".join(text)
        else:
            return ""

    def extract_product_description(self, text, site):
        if text:
            return '|-|'.join([f.strip() for f in text if f.strip()]) if len('|-|'.join([f.strip() for f in text if f.strip()])) > 20 else ""
        else:
            return ""

    def extract_rank(self, text, site):
        # 样例模板
        # us
        # <span>#12,931 in Books (<a href="/gp/bestsellers/books/ref=pd_zg_ts_books">See Top 100 in Books</a>)</span>
        # </span> #16,273 in Clothing, Shoes &amp; Jewelry (<a href="/gp/bestsellers/fashion/ref=pd_zg_ts_fashion">See

        # de
        # <span>Nr. 925.506 in Küche, Haushalt &amp; Wohnen (<a href="/gp/bestsellers/kitchen/ref=pd_zg_ts_kitchen">
        # <th class="a-color-secondary a-size-base prodDetSectionEntry"> Amazon Bestseller-Rang </th> <td> <span>  <span>Nr. 148.389 in Küche, Haushalt &amp; Wohnen (<a href="/gp/bestsellers/kitchen/ref=pd_zg_ts_kitchen">Siehe Top 100 in Küche, Haushalt &amp; Wohnen</a>)
        # <span>#22 591 i Skönhet (<a href="/gp/bestsellers/beauty/ref=pd_zg_ts_beauty">Visa Topp 100 i Skönhet</a>)
        # '<td> <span>   <ul class="a-unordered-list a-nostyle a-vertical">  <li><span class="a-list-item"><span>#2,409,106 in Home &amp; Kitchen (<a href="/gp/bestsellers/home
        if text:
            rank = [i[0].replace(' ', '').replace('.', '').replace(',', '') for i in re.findall(r'(\d+(?:[ |,|.]*\d*)*).*?href="(.*?)".*?</a>', text[0]) if not re.search(r'/\d*/', i[1])]
            return rank[0] if rank else ""
        else:
            return ""

    def extract_total_comments(self, text, site):
        text = [i for i in text if ("betyg" in i) or ('Liczba ocen' in i) or ('beoordelingen' in i) or ("beoordelingen" in i) or ("değerlendir" in i) or ("calificacion" in i) or ("rating" in i) or ("voti" in i) or ("évaluat" in i) or ("Sternebewertungen" in i) or ("valoraci" in i) or (i.isdigit())]
        if text:
            if 'Liczba ocen' in text[0]:
                return text[0].strip().split(":")[-1].replace(" ", "").replace(".", "").replace("\\xa0", "")
            if "betyg" in text[0]:
                return text[0].strip().replace("\\xa0", "").replace("betyg", "").replace(" ", "")
            else:
                return text[0].strip().split(" ")[0].replace(",", "").replace(".", "").replace("\\xa0", "")
        else:
            return ""

    def extract_page_inventory(self, text, site):
        page_inventory = [i for i in set(text) if i.isdigit()]
        if not page_inventory:
            return 3
        if len(page_inventory) > 20:
            return 1
        else:
            return 2

    def extract_category(self, text, site):
        if text:
            return "".join(text)
        else:
            return ""

    def extract_volume(self, text, site):
        if text:
            t = re.sub(r"\\u.{4}", '', text[0].__repr__()).replace("'", "").replace("Dimensions:", "")
            if ";" in t.strip():
                return t.strip().split(";")[0]
            else:
                return t.strip()
        else:
            return ""

    def extract_sp_initial_seen_asins_json(self, text, site):
        if text:
            for i in text:
                sp_detail = json.loads(i)
                if sp_detail.get("initialSeenAsins"):
                    SeenAsins_list = []
                    for SeenAsins in  sp_detail.get("initialSeenAsins"):
                        initialSeenAsins_dict = {}
                        SeenAsins_title = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/@title').get()
                        SeenAsins_src_list = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/img/@src').getall()
                        if SeenAsins_src_list:
                            if len(SeenAsins_src_list[0]) > 400:
                                SeenAsins_srcs = SeenAsins_src_list[-1]
                            else:
                                SeenAsins_srcs = SeenAsins_src_list[0]
                        else:
                            SeenAsins_srcs = None
                        SeenAsins_total_comments = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/i/following-sibling::span/text()').get()
                        SeenAsins_price = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/span[contains(@class,"-price")]//text()').get()
                        initialSeenAsins_dict['seen_asins'] = SeenAsins
                        initialSeenAsins_dict['seen_asins_title'] = SeenAsins_title
                        initialSeenAsins_dict['seen_asins_src'] = SeenAsins_srcs
                        initialSeenAsins_dict['seen_asins_total_comments'] = SeenAsins_total_comments
                        initialSeenAsins_dict['seen_asins_price'] = SeenAsins_price
                        SeenAsins_list.append(initialSeenAsins_dict)
                    if SeenAsins_list:
                        SeenAsins_json = json.dumps(SeenAsins_list)
                        return SeenAsins_json
                    else:
                        return ""
                else:
                    return ""
        else:
            return ""

    def extract_sp_4stars_initial_seen_asins_json(self, text, site):
        if text:
            for i in text:
                sp_detail = json.loads(i)
                if sp_detail.get("initialSeenAsins"):
                    SeenAsins_list = []
                    for SeenAsins in sp_detail.get("initialSeenAsins"):
                        initialSeenAsins_dict = {}
                        SeenAsins_title = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/@title').get()
                        SeenAsins_src_list = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/img/@src').getall()
                        if SeenAsins_src_list:
                            if len(SeenAsins_src_list[0]) > 400:
                                SeenAsins_srcs = SeenAsins_src_list[-1]
                            else:
                                SeenAsins_srcs = SeenAsins_src_list[0]
                        else:
                            SeenAsins_srcs = None
                        SeenAsins_total_comments = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/i/following-sibling::span/text()').get()
                        SeenAsins_price = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/span[contains(@class,"-price")]//text()').get()
                        initialSeenAsins_dict['seen_asins'] = SeenAsins
                        initialSeenAsins_dict['seen_asins_title'] = SeenAsins_title
                        initialSeenAsins_dict['seen_asins_src'] = SeenAsins_srcs
                        initialSeenAsins_dict['seen_asins_total_comments'] = SeenAsins_total_comments
                        initialSeenAsins_dict['seen_asins_price'] = SeenAsins_price
                        SeenAsins_list.append(initialSeenAsins_dict)
                    if SeenAsins_list:
                        SeenAsins_json = json.dumps(SeenAsins_list)
                        return SeenAsins_json
                    else:
                        return ""
                else:
                    return ""
        else:
            return ""

    def extract_sp_delivery_initial_seen_asins_json(self, text, site):
        if text:
            for i in text:
                sp_detail = json.loads(i)
                if sp_detail.get("initialSeenAsins"):
                    SeenAsins_list = []
                    for SeenAsins in sp_detail.get("initialSeenAsins"):
                        initialSeenAsins_dict = {}
                        SeenAsins_title = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/@title').get()
                        SeenAsins_src_list = self.res.xpath(f'//a[contains(@href,"{SeenAsins}")]/img/@src').getall()
                        if SeenAsins_src_list:
                            if len(SeenAsins_src_list[0]) > 400:
                                SeenAsins_srcs = SeenAsins_src_list[-1]
                            else:
                                SeenAsins_srcs = SeenAsins_src_list[0]
                        else:
                            SeenAsins_srcs = None
                        SeenAsins_total_comments = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/i/following-sibling::span/text()').get()
                        SeenAsins_price = self.res.xpath(
                            f'//a[contains(@href,"{SeenAsins}")]/span[contains(@class,"-price")]//text()').get()
                        initialSeenAsins_dict['seen_asins'] = SeenAsins
                        initialSeenAsins_dict['seen_asins_title'] = SeenAsins_title
                        initialSeenAsins_dict['seen_asins_src'] = SeenAsins_srcs
                        initialSeenAsins_dict['seen_asins_total_comments'] = SeenAsins_total_comments
                        initialSeenAsins_dict['seen_asins_price'] = SeenAsins_price
                        SeenAsins_list.append(initialSeenAsins_dict)
                    if SeenAsins_list:
                        SeenAsins_json = json.dumps(SeenAsins_list)
                        return SeenAsins_json
                    else:
                        return ""
                else:
                    return ""
        else:
            return ""

    def extract_compare_similar_asin_json(self, text, site):
        if text:
            compare_similar_list = []
            for compare_similar_asin in text:
                compare_asin_dict = {}
                try:
                    compare_asin = compare_similar_asin.split('-')[1]
                    compare_asin_srcs = \
                        self.res.xpath(f'//div[contains(@id,"{compare_similar_asin}")]//img/@src').get()
                    # compare_asin_src = compare_asin_srcs[0] if compare_asin_srcs else None
                    compare_asin_titles = self.res.xpath(
                        f'//div[contains(@id,"{compare_similar_asin}")]/parent::div/following-sibling::div//span/text()').get()
                    # compare_asin_title = compare_asin_titles[0] if compare_asin_titles else None
                    compare_asin_dict['compare_asin'] = compare_asin
                    compare_asin_dict['compare_asin_src'] = compare_asin_srcs
                    compare_asin_dict['compare_asin_title'] = compare_asin_titles
                    compare_similar_list.append(compare_asin_dict)
                except:
                    pass
            return json.dumps(compare_similar_list) if compare_similar_list else ""
        else:
            return ""

    def extract_customer_reviews_json(self, span_list, site):
        if span_list:
            # 提取属性和评分，存储在新的列表中
            extracted_data = []
            for i in range(0, len(span_list), 3):
                extracted_data.append(span_list[i].strip())
                extracted_data.append(span_list[i + 1])
            # 输出提取的数据
            customer_reviews_list = []
            # 遍历去重后的列表，每两个元素为一组，创建新的字典对象并添加到列表中
            if extracted_data:
                for i in range(0, len(extracted_data), 2):
                    key = extracted_data[i]
                    value = extracted_data[i + 1]
                    # 创建新的字典对象
                    customer_reviews_json = {}
                    customer_reviews_json[key] = value
                    customer_reviews_list.append(customer_reviews_json)
                if customer_reviews_list:
                    customer_reviews_dict = json.dumps(customer_reviews_list)
                else:
                    customer_reviews_dict = ""
            else:
                customer_reviews_dict = ""
            return customer_reviews_dict
        else:
            return ""

    def extract_together_asin_json(self, text, site):
        if text:
            together_asin_list = []
            for buy_asin in text:
                together_asin_ = re.findall(r'/dp/(.*?)/ref', buy_asin)
                if together_asin_:
                    together_asin_dict = {}
                    together_asin_titles = self.res.xpath(
                        f"//a[contains(@href,'{together_asin_[0]}')]//span/text()").get()
                    together_asin_prices = self.res.xpath(
                        f"//a[contains(@href,'{together_asin_[0]}')]/parent::div/following-sibling::div//span[contains(@class,'price')]/span/text()").get()
                    together_asin_dict['together_asin'] = together_asin_[0]
                    together_asin_dict['together_asin_title'] = together_asin_titles
                    together_asin_dict['together_asin_price'] = together_asin_prices
                    together_asin_list.append(together_asin_dict)
            return json.dumps(together_asin_list) if together_asin_list else ""
        else:
            return ""

    def extract_min_match_asin_json(self, min_match_list_asin_list, site):
        if min_match_list_asin_list:
            min_match_asin_data_list = []
            for bundlesAsin in min_match_list_asin_list:
                together_asin_dict = {}
                if bundlesAsin != self.res.meta.get('asin'):
                    min_match_asin_titles = self.res.xpath(
                        f'//a[contains(@href,"{bundlesAsin}")]//span/text()').get()
                    min_match_asin_srcs = \
                        self.res.xpath(f'//a[contains(@href,"{bundlesAsin}")]//img/@data-src').get()
                    min_match_asin_prices = self.res.xpath(
                        f'//a[contains(@href,"{bundlesAsin}")]/parent::div//div/span[contains(@class,"a-price")]/span/text()').get()
                    min_match_asin_total_comments = self.res.xpath(
                        f'//a[contains(@href,"{bundlesAsin}")]/parent::div//div/i[contains(@class,"-star-small")]/span/text()').get()
                    min_match_asin_total_comment = min_match_asin_total_comments[
                        0] if min_match_asin_total_comments else None
                    together_asin_dict['min_match_asin'] = bundlesAsin
                    together_asin_dict['min_match_asin_title'] = min_match_asin_titles
                    together_asin_dict['min_match_asin_src'] = min_match_asin_srcs
                    together_asin_dict['min_match_asin_price'] = min_match_asin_prices
                    together_asin_dict['min_match_asin_total_comment'] = min_match_asin_total_comment
                    min_match_asin_data_list.append(together_asin_dict)
            return json.dumps(min_match_asin_data_list) if min_match_asin_data_list else ""
        else:
            return ""

    # def extract_seller_json(self, text, site):
    #     if text:
    #         t = re.sub(r"\\u.{4}", '', text[0].__repr__()).replace("'", "").replace("Dimensions:", "")
    #         if ";" in t.strip():
    #             return t.strip().split(";")[0]
    #         else:
    #             return t.strip()
    #     else:
    #         return ""


    def weight_changes(self, text):
        if len(text.split(" ")) == 1:
            # '0.53kg'
            # 对于  5点描述内 weight 格式处理
            unit_ = re.compile(r"|".join(list(i for i in set(self.weight_dict.keys()))))
            if '/' in text:
                msg = [i for i in text.split('/') if unit_.search(i)]
                if msg:
                    text = msg[-1]
                else:
                    return ""
            unit_p = re.compile(r".*?(\d+|\d\.\d*).?(" + r"|".join(list(i for i in set(self.weight_dict.keys()))) + r")")
            matches = unit_p.match(text)
            if matches:
                weight = matches.group(1)
                unit = matches.group(2)
            else:
                logging.info(f"未找到匹配项 {text}")
                return ""
        else:
            weight = text.split(" ")[0]
            unit = text.split(" ")[1]
        if not self.weight_dict.get(unit, ""):
            return ""
        if self.weight_dict.get(unit, "") == "公斤":
            return str(float(weight) * 1000)
        elif self.weight_dict.get(unit, "") == "千克":
            return str(float(weight) * 1000)
        elif self.weight_dict.get(unit, "") == "克":
            return str(float(weight))
        elif self.weight_dict.get(unit, "") == "磅":
            return str(float(weight) * 453.59237)
        return ""

    def weight_changes_us(self, text):
        if len(text.split(" ")) == 1:
            # '0.53kg'
            # 对于  5点描述内 weight 格式处理
            unit_ = re.compile(r"|".join(list(i for i in set(self.weight_dict.keys()))))
            if '/' in text:
                msg = [i for i in text.split('/') if unit_.search(i)]
                if msg:
                    text = msg[-1]
                else:
                    return ""
            unit_p = re.compile(r".*?(\d+|\d\.\d*).?(" + r"|".join(list(i for i in set(self.weight_dict.keys()))) + r")")
            matches = unit_p.match(text)
            if matches:
                weight = matches.group(1)
                unit = matches.group(2)
            else:
                logging.info(f"未找到匹配项 {text}")
                return ""
        else:
            weight = text.split(" ")[0]
            unit = text.split(" ")[1]
        if not self.weight_dict.get(unit, ""):
            return ""
        if self.weight_dict[unit] == "盎司":
            return str(float(weight) * 0.062500)
        elif self.weight_dict[unit] == "磅":
            return str(float(weight))
        elif self.weight_dict[unit] == "克":
            return str(float(weight) * 0.0022046)
        elif self.weight_dict[unit] == "毫克":
            return str(float(weight) * 0.00000220462262185)
        elif self.weight_dict[unit] == "公斤":
            return str(float(weight) * 2.2046226)
        elif self.weight_dict[unit] == "斤":
            return str(float(weight) * 1.1023113)
        elif self.weight_dict[unit] == "千克":
            return str(float(weight) * 2.2046226)
        return ""

    def unit_to_num(self, text, site):
        unitary_ratio = copy.deepcopy(self.unitary_ratio_pound) if site == "us" else copy.deepcopy(self.unitary_ratio_g)
        unit_ = re.compile(r"|".join(list(i for i in set(self.weight_dict.keys()))))
        if '/' in text:
            # 'approx.40g', 'approx.5g/0.2oz', '1.5g/pcs' 五点描述内 重量处理 需要获取后面的重量与单位
            msg = [i for i in text.split('/') if unit_.search(i)]
            if msg:
                text = msg[-1]
            else:
                return ""
        # 解析优化
        unit_p = re.compile(r".*?(\d+|\d\.\d*).?(" + r"|".join(list(i for i in set(self.weight_dict.keys()))) + r")")
        matches = unit_p.match(text)
        if matches:
            weight = float(matches.group(1))
            unit = matches.group(2)
            if self.weight_dict.get(unit, ""):
                logging.info(f"weight_dict中没有相关unit：{unit}", )
                return ""
            if unitary_ratio.get(self.weight_dict[unit]):
                logging.info(f"unitary_ratio 中没有相关unit：{unit}")
                return ""
            value_in_pounds = weight * unitary_ratio.get(self.weight_dict[unit])
            logging.info(f"{weight} {self.weight_dict[unit]} {unit} 是 {value_in_pounds} 磅")
            return str(value_in_pounds)
        else:
            logging.info(f"匹配为空  {text}")
            return ""

    def extract_weight(self, text, site):
        if text:
            # t = re.sub(r"\\u.{4}", '', text[0].__repr__()).replace("'", "")
            if text[0].isalpha():
                return ""
            try:
                return self.unit_to_num(text[0], self.site)
            except:
                return ""
        else:
            return ""

    def extract_launch_time(self, text, site):
        text = [i for i in text if i.strip() and not i.isalpha() and "/" not in i and "(" not in i]
        if text:
            t = time_ch(site, text[0])
            return t if t else ""
        else:
            return ""

    def extract_qa_num(self, text, site):
        if text:
            try:
                int(text[0].strip().split(" ")[0].replace(",", "").replace("+", ""))
                return text[0].strip().split(" ")[0].replace(",", "").replace("+", "")
            except:
                return ""
        else:
            return ""

    def extract_star(self, text, site):
        if text:
            return text[0].strip().split("%")[0].split(" ")[-1]
        else:
            return "0"

    def extract_brand(self, text, site):
        if text:
            if len(text) > 1:
                return text[1]
            brand_list = re.findall(r'den(.*)Store|Marke:(.*)|the(.*)Store|boutique(.*)|:(.*)|de (.*)|Brand(.*)|Visita lo Store di (.*)',
                                    text[0])
            try:
                brand = [i.strip() for i in brand_list[0] if i.strip()][0] if [i.strip() for i in brand_list[0] if i.strip()] else ""
            except:
                logging.info(f"brand_list 索引报错 {brand_list}")
                return ""
            return brand.replace("\xa0", "").replace(':', '').replace('-', '').strip()
        else:
            return ""

    def extract_node_id(self, text, site):
        if text:
            return text[-1].split("node=")[-1].strip()
        else:
            return ""

    def extract_together_asin(self, text, site):
        if text:
            return text[0].replace("[", "").replace("]", "").replace('"', '')
        else:
            return ""

    def extract_material(self, text, site):
        if text:
            t = re.sub(r"\\u.{4}", '', "".join(text).__repr__()).replace("'", "")
            return t
        else:
            return ""

    def extract_buy_box_seller_type(self, text, site):
        if text:
            buy_box_seller_type_1 = [i.replace("Fulfilled by Amazon", "Amazon").replace("Amazon Warehouse", "Amazon") for i in set(text) if i not in ["Ships from", "Sold by", "Customs & Duties", "policy"]]
            if len(buy_box_seller_type_1) > 1 and (
                    "Amazon" in buy_box_seller_type_1 or "Amazon.com" in buy_box_seller_type_1):
                return 2
            elif "Amazon.com" not in buy_box_seller_type_1 and "Amazon" not in buy_box_seller_type_1:
                return 3
            elif buy_box_seller_type_1[0] == "Amazon" or buy_box_seller_type_1[0] == "Amazon.com":
                return 1
        else:
            return 4

    def extract_follow_sellers(self, text, site):
        if text:
            buy_num = re.findall(".*?(\d+).*?", text[0])
            if buy_num:
                return buy_num[0]
            else:
                return ""
        else:
            return ""

    def extract_buy_sales(self, text, site):
        if text:
            if site == 'us' or site == 'uk':
                if 'bought in past' not in " ".join(text):
                    return ""
            elif site == 'de':
                if 'Malimletzten Monat' not in " ".join(text):
                    return ""
            return " ".join(text)
        else:
            return ""

    def extract_video_url(self, text, site):
        if text:
            if ('play-' in text[-1] and len(text[-1]) > 82) or (
                    'video.' in text[-1] and len(text[-1]) > 99):
                video_url = text[-1]
                return video_url
            return ""
        else:
            return ""

    def extract_add_url(self, text, site):
        if text:
            try:
                aplus_img_src_url = text[1]
                return aplus_img_src_url
            except Exception as e:
                print("============== 获取A+ url 报错 ==============")
                return ""
        else:
            return ""

    def extract_cat_id(self, text, site):
        return text[-1].split("node=")[-1] if text else ""

    def extract_online_time(self, text, site):
        try:
            if text:
                return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.time()))
            else:
                return ''
        except:
            return ''

    def extract_productdetail_json_rank(self, text, site):
        if text:
            return ''.join(text)
        return ""

    def extract_image_view(self, text, site):
        if text:
            # img_json = json.loads(text[0].split("data =")[-1].split(";")[0].strip())
            msg = text[0].split("playVideoInImmersiveView")[-1].split(",")[0].strip()
            print(msg)
            if "true" in msg:
                return "1"
            else:
                return "0"
        else:
            return "0"

    def category(self, category):
        item = {}
        if category:
            c = category.split("›")
            for k, v in enumerate(['root_category', 'second_category', 'three_category', 'four_category', 'five_category', 'six_category', 'seven_category']):
                try:
                    item[v] = c[k]
                except IndexError:
                    item[v] = ""
            return item
        else:
            return ""

    def site_coupe(self, response):
        activity_type = []
        item = {}
        if coupon := response.xpath(
            "//label[contains(text(), 'Apply') or contains(text(), 'Applica')]//text()").get():
            one_val = re.findall("\d+,\d+", coupon.replace("\xa0", "")) or re.findall(r"\d+",
                                                                                      coupon.replace("\xa0", ""))
            promo_amount = response.xpath(
                "//span[@class='a-color-success' and (contains(text(), '%') or contains(text(), '$') or contains(text(), '€') or contains(text(), '£'))]//text()").get(
                "").strip()
            if promo_amount:
                one_val = re.findall("\d+,\d+", promo_amount.replace("\xa0", "")) or re.findall(r"\d+",
                                                                                                promo_amount.replace(
                                                                                                    "\xa0", ""))
                item["one_two_val"] = one_val[0].replace(",", ".") if one_val else ""
                coupon = re.findall("\d+(?:%|\$|£|€),\d+", promo_amount.replace("\xa0", "")) or re.findall(
                    r"\d+(?:%|\$|£|€)",
                    promo_amount.replace(
                        "\xa0", "")) or re.findall(r"(?:%|\$|£|€)\d", promo_amount.replace(
                    "\xa0", ""))
                is_coupon = coupon[0].replace(",", ".") if one_val else ""
                item["is_coupon"] = is_coupon
                # item["activity_type"] = "1" if "%" in promo_amount else "2"
                activity_type.append("1" if "%" in promo_amount else "2")
            elif one_val:
                item["one_two_val"] = one_val[0].replace(",", ".") if one_val else ""
                # item["activity_type"] = "1" if "%" in coupon else "2"
                activity_type.append("1" if "%" in coupon else "2")
        elif response.xpath(
                "//i[contains(text(),'Coupon') or contains(text(),'Mehr sparen') or contains(text(),'cupón') or contains(text(),'Cupón')]").get() or response.xpath(
            "//i[contains(text(), 'Économisez')]").get():
            promo_amount = response.xpath(
                "//span[@class='a-color-success' and (contains(text(), '%') or contains(text(), '$') or contains(text(), '€') or contains(text(), '£'))]//text()").get(
                "").strip()
            one_val = re.findall("\d+,\d+", promo_amount.replace("\xa0", "")) or re.findall(r"\d+",
                                                                                            promo_amount.replace("\xa0",
                                                                                                                 ""))
            item["one_two_val"] = one_val[0].replace(",", ".") if one_val else ""
            coupon = re.findall("\d+(?:%|\$|£|€),\d+", promo_amount.replace("\xa0", "")) or re.findall(
                r"\d+(?:%|\$|£|€)",
                promo_amount.replace(
                    "\xa0", "")) or re.findall(r"(?:%|\$|£|€)\d", promo_amount.replace(
                "\xa0", ""))
            is_coupon = coupon[0].replace(",", ".") if one_val else ""
            item["is_coupon"] = is_coupon
            # item["activity_type"] = "1" if "%" in promo_amount else "2"
            activity_type.append("1" if "%" in promo_amount else "2")
        if Top_Deal := response.xpath(
                '//td[contains(text(),"Top Deal:")]/following-sibling::td//span[@class="a-offscreen"]').get():
            item["five_six_val"] = Top_Deal.replace("$", "").replace("£", "").replace("€", "")
            # item["activity_type"] = "6"
            item["five_six_val"] = item["five_six_val"].replace(",", "") if "." in item["five_six_val"] else item["five_six_val"]
            activity_type.append("6")
        elif response.xpath('//td[contains(text(),"Deal Price:")]|//td[contains(text(),"With Deal:")]').get():
            item["five_six_val"] = response.xpath(
                '//td[contains(text(),"Deal Price:") or contains(text(),"With Deal:")]/following-sibling::td//span[@class="a-offscreen"]/text()').get()
            item["five_six_val"] = item.get("five_six_val", "").replace("$", "").replace("£", "").replace("€",
                                                                                                          "") if item.get(
                "five_six_val", "") else ""
            item["five_six_val"] = item["five_six_val"].replace(",", "") if "." in item["five_six_val"] else item["five_six_val"]
            # item["activity_type"] = "5"
            activity_type.append("5")
        if self.site != "us":
            if Join_Prime := response.xpath(
                "//span[@id='dealBadgeSupportingText' and @class='a-size-small dealBadgeTextColor a-text-bold']/span/text()").get():
                if Join_Prime.lower() in ["black friday", "black friday deal"]:
                    site_prime = response.xpath(
                        "//span[@class='a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage']//text()").get()
                    three_four = re.findall("\d+,\d+", site_prime.replace("\xa0", "")) or re.findall(r"\d+",
                                                                                                     site_prime.replace(
                                                                                                         "\xa0", ""))
                    item["three_four_val"] = three_four[0] if three_four else ""
                    activity_type.append("3" if "%" in site_prime else "4")
        else:
            if Join_Prime := response.xpath('//span[contains(text(),"Join Prime")]/following-sibling::span/text()').get():
                item["three_four_val"] = Join_Prime.replace("$", "").replace("£", "").replace("€", "")
                item["three_four_val"] = item["three_four_val"].replace(",", "") if "." in item["three_four_val"] else item["three_four_val"]
                # item["activity_type"] = "3" if "%" in Join_Prime else "4"
                activity_type.append("3" if "%" in Join_Prime else "4")
        if eight := response.xpath(
                "//div[@id='promoPriceBlockMessage_feature_div' or @id='applicable_promotion_list_sec']//*[contains(text(), 'ave') and contains(text(), 'uy') and not(contains(text(), 'function'))]//text()").get():
            # item["eight_val"] = eight.split("ave")[-1].replace("%", "").strip()
            # item["eight_val"] = item["eight_val"].split("on")[0].strip() if 'on' in item["eight_val"] else item["eight_val"]
            eig_val = re.findall("ave (\d*).*?", eight)
            item["eight_val"] = eig_val[0] if eig_val else ""
            # item["activity_type"] = item.get("activity_type") + ",7" if item.get("activity_type") else "7"
            activity_type.append("7")
        if response.xpath(
                "//div[@id='shippingMessageInsideBuyBox_feature_div' or @id='applicable_promotion_list_sec']//*[contains(text(), 'Amazon Prime')]//text()").get() or \
            response.xpath("//span[@class='promoPriceBlockMessage']//*[contains(text(), 'Sign in to redeem') or contains(text(), 'promo code')]//text()").getall():
            # item["eight_val"] = eight.split("ave")[-1].replace("%", "").strip()
            # item["eight_val"] = item["eight_val"].split("on")[0].strip() if 'on' in item["eight_val"] else item["eight_val"]
            # nine_val = re.findall("ave (\d*).*?", eight)
            # item["activity_type"] = item.get("activity_type") + ",7" if item.get("activity_type") else "7"
            activity_type.append("7")
        item["activity_type"] = ",".join(list(set(activity_type)))
        return item

    def extract_product_json(self, text, site):
        # 产品信息描述B083BCTLP5 五点描述上方 product_attribute
        product_json = {}
        if text:
            response = Selector(text=text[0], type="html")
            d_lixt = response.xpath(".//tr") or response.xpath(".//div[@class='a-fixed-left-grid-inner']")
            for i in d_lixt:
                d = [k for k in i.xpath(".//span//text()").getall() if k.strip()][0:2]
                if len(d)!=2:
                    continue
                k, v = d
                product_json[k.strip()] = v.strip()
            return json.dumps(product_json)
        else:
            return ""

    def extract_productdetail_json(self, text, site):
        # 产品底部属性。描述信息
        product_json = {}
        if text:
            for t in text:
                response = Selector(text=t, type="html")
                xpath_list = response.xpath(".//li") or response.xpath(".//tr")
                for i in xpath_list:
                    d = [k for k in i.xpath(".//text()").getall() if k.strip()]
                    if len(d) == 2:
                        product_json[d[0].strip().replace('  ', '').replace("\\\\t", "").strip()] = d[1].strip()
                    # else:
                    #     product_json[d[0].strip()] = "|-|".join(d[1::])
            if product_json:
                return product_json
            else:
                return ""
        else:
            return ""

    def extract_review_ai_text(self, text, site):
        if text:
            tag = [i for i in text if "<div" not in i]
            com = [i for i in text if "<div" in i]
            comment_tag = {}
            for k, i in enumerate(com):
                response = Selector(text=i, type="html")
                span_text = response.xpath(f".//span/text()").getall()
                p_text = response.xpath(f".//p//text()").getall()
                _text = '&&&&'.join(span_text) + '|-|' + '&&&&'.join(p_text)
                if tag:
                    comment_tag[tag[k]] = _text
                else:
                    tag_1 = response.xpath('.//div[@data-aspect]/@data-aspect').get().strip()
                    comment_tag[tag_1] = _text
            return json.dumps(comment_tag)
        else:
            return ''

    def extract_lob_asin_json(self, text, site):
        if text:
            pba_lob_asin_data_list = []
            for pba_lob_asin in text:
                pba_lob_asin_data_json = {}
                lob_asin = re.findall(r'dp\/(.*)', pba_lob_asin)
                if lob_asin:
                    pba_lob_asin_data_json['lob_asin'] = lob_asin[0]
                    pba_lob_asin_data_list.append(pba_lob_asin_data_json)
            if pba_lob_asin_data_list:
                return json.dumps(pba_lob_asin_data_list)
            else:
                return ""
        else:
            return ''

    def extract_returns(self, text, site):
        if text:
            return text[0].replace("\xa0", "")
        else:
            return ""

    def run(self, response, url_all=False, img_min=False):
        self.res = response
        item = {}
        volume = None
        weight = None
        buy_box = None
        for k, v in ASIN_XPATH["us"].items():
            ttt = time.time()
            msgs = [unicodedata.normalize('NFKC', i).strip().replace("\\\\n", "").replace('\\n', '').replace('\n', '').replace('\u200e', '').replace('\u200f', "") for i in
                    response.xpath("|".join(v)).getall() if i.strip()]
            print("解析", k, time.time() - ttt)
            if k == "volume":
                msgs = [i for i in msgs if ";" in i] if [i for i in msgs if ";" in i] else msgs
                volume = msgs
            if k == "price":
                # 过滤掉没有 符号的 数据
                price_pattern = re.compile("\$|£|€|TL|zł|kr")
                msgs = [i for i in msgs if re.search(price_pattern, i)]
            if k == "weight":
                weight = msgs
            if k == "buy_box_seller_type":
                buy_box = msgs
            # if k in ['one_star', 'two_star', 'three_star', 'four_star', 'five_star']:
            #     mhd = getattr(self, f'extract_star', self.extract_)
            if k == "img_url" and img_min:
                mhd = getattr(self, f'extract_img_url_min', self.extract_)
            else:
                mhd = getattr(self, f'extract_{k}', self.extract_)
            if k in ['one_star', 'two_star', 'three_star', 'four_star', 'five_star']:
                star_dict = {
                    "one_star": 1,
                    "two_star": 2,
                    "three_star": 3,
                    "four_star": 4,
                    "five_star": 5,
                }
                site_re = {
                    "us": r'(\d+) percent of reviews have ' + f"{star_dict[k]} stars",
                    "fr": r'(\d+) pourcentage des avis ont ' + f"{star_dict[k]} étoiles",
                    "de": r'(\d+) Prozent der Bewertungen haben ' + f"{star_dict[k]} Sterne",
                    "es": r'Un (\d+) por ciento de las reseñas tienen ' + f"{star_dict[k]} estrellas",
                    "it": r'(\d+)% delle recensioni hanno ' + f"{star_dict[k]} stelle",
                    "uk": r'(\d+) percent of reviews have ' + f"{star_dict[k]} stars",
                    "ca": r'(\d+) percent of reviews have ' + f"{star_dict[k]} stars",
                    "mx": r'El (\d+)\xa0por ciento de las opiniones tienen ' + f"{star_dict[k]}\xa0estrellas",
                }
                star = re.findall(site_re.get(self.site, site_re.get("us")), response.text)
                item[k] = star[0] if star else "0"
            else:
                item[k] = mhd(msgs, response.meta.get("site") or self.site)
        item["weight_str"] = weight[0] if weight else ""
        if not item.get("weight") and item.get("volume"):
            # 有体积没重量  从体积内提取重量
            if ";" in volume[0]:
                try:
                    item["weight"] = self.unit_to_num(volume[0].split(";")[1].strip(), self.site)
                    item["weight_str"] = volume[0] if volume else ""
                except:
                    item["weight"] = ""
                    item["weight_str"] = ""
        if not item.get("volume"):
            size = response.xpath('//th[text()=" Size "]/parent::tr/td/text()').get("")
            item["volume"] = re.sub(r"\\u.{4}", '', size.strip().__repr__()).replace("'", "").replace("\\xa0",
                                                                                               "").strip()
        if not item.get("volume"):
            # 判断前几种情况没获取到 追加5点描述内获取  直接添加到 解析规则列表中 对其他asin会有影响
            product_description = response.xpath('//div[@id="productDescription"]//br/following-sibling::text()').getall()
            if product_description:
                volume = [i.lower().replace("dimension:", "").strip() for i in product_description if "dimension" in i.lower()]
                if volume:
                    item["volume"] = volume[0]
                else:
                    item["volume"] = ""
            else:
                item["volume"] = ""
        if not item.get("weight_str"):
            # 判断前几种情况没获取到 追加5点描述内获取  直接添加到 解析规则列表中 对其他asin会有影响
            product_description = response.xpath('//div[@id="productDescription"]//br/following-sibling::text()').getall()
            if product_description:
                weight_str = [i.lower().replace("weight:", "").strip() for i in product_description if "weight" in i.strip().lower()]
                if weight_str:
                    item["weight"] = self.unit_to_num(weight_str[0], self.site)
                    item["weight_str"] = weight_str[0] if weight_str else ""
                else:
                    item["weight"] = ""
                    item["weight_str"] = ""
            else:
                item["weight"] = ""
                item["weight_str"] = ""
        item["asin"] = response.meta.get("asin")
        item["title_len"] = len(item["title"])
        item["img_num"] = len(item["img_url"])
        item['low_star'] = sum([int(item["one_star"]), int(item["two_star"]), int(item["three_star"])])
        if url_all:
            item["img_url"] = ",".join(item["img_url"])
        else:
            # 获取第一张图片
            item["img_url"] = item["img_url"][0] if item["img_url"] else ""
        video_A_type_list = []
        if item.get("img_url"):
            video_A_type_list.append("1")
        if item["video_url"]:
            video_A_type_list.append("2")
        if item["add_url"]:
            video_A_type_list.append("3")
        item["img_type"] = ",".join(video_A_type_list)

        item.update(self.site_coupe(response))

        item["is_coupon"] = item.get("is_coupon", "")
        item["one_two_val"] = item.get("one_two_val", "")
        item["three_four_val"] = item.get("three_four_val", "")
        item["three_four_val"] = item["three_four_val"].replace(",", "") if "." in item["three_four_val"] else item[
            "three_four_val"]
        item["five_six_val"] = item.get("five_six_val", "")
        item["eight_val"] = str(float(item.get("eight_val"))) if item.get("eight_val") else ""
        item["asin_state"] = 1
        item["data_type"] = str(response.meta.get("data_type"))
        if not item['volume'] and not item['weight'] and not item['rank'] and not item['launch_time'] and not item[
            'price'] and not item['rating'] and not item['total_comments']:
            # 判断这几个字段是否有值,如果都没有 asin状态给9.但要存储到detail表中
            item["asin_state"] = 9
        if not item['volume'] and not item['weight'] and not item['rank'] and not item['launch_time']:
            item["asin_state"] = 7
        # keepa数据处理
        item["salesChannel"] = f"{urlparse(response.url).hostname}".replace("www.", "")
        item["reviews"] = item.get("total_comments")
        item["current_rank"] = item.get("rank")
        item["brand_name"] = item.get("brand")
        c = self.category(item.get("category"))
        item.update(c)

        item["asin_type"] = response.meta.get("asin_type")
        item["bsr_date_info"] = response.meta.get("date_info") if response.meta.get("date_info") else ""
        item["date_info"] = time.strftime('%Y-%m-%d', time.localtime())
        item["site"] = response.meta.get("site")

        u = {
            "us": "Sold by",
            "uk": "Sold by",
            "fr": "Vendu par",
            "de": "Verkäufer",
            "es": "Vendido por",
            "it": "Venditore",
            "mx": "Vendido por",
            "ca": "Sold by",
        }
        ssite = response.meta.get("site")
        url1 = response.xpath(
            f'//div[@id="desktop_qualifiedBuyBox"]//div[@tabular-attribute-name="{u.get(ssite)}"]//span[contains(@class, "a-size-small") and contains(@class, "text") or ./a[@id="sellerProfileTriggerId"]]/a/@href').get()
        url2 = response.xpath(
            f'//div[@id="desktop_qualifiedBuyBox"]//div[@tabular-attribute-name="{u.get(ssite)}" and @class="tabular-buybox-text"]//span[contains(@class, "a-size-small") or ./a[@id="sellerProfileTriggerId"]]/a/@href').get()
        url3 = response.xpath(
            "//div[@id='desktop_qualifiedBuyBox']//div[contains(@class, 'offer-display-feature-text a-spacing-none')]//a/@href"
        ).getall()
        url3 = [i for i in url3 if "seller=" in i] if url3 else None
        url3 = url3[0] if url3 else None
        if not url1 and not url2 and not url3:
            item["seller_id"] = ""
        else:
            url = url1 or url2 or url3
            item["seller_id"] = url.split("seller=")[-1].split("&")[0]
        if response.meta.get("site") == "ca":
            account_name = response.xpath(
                '//div[@id="desktop_qualifiedBuyBox"]//div[@id="merchant-info"]/a[contains(@href, "seller=")]//text()').get(
                "") or buy_box[1] if buy_box else ""
            item["account_name"] = account_name if account_name else ""
            buy_boy_type1 = response.xpath(
                '//div[@id="desktop_qualifiedBuyBox"]//div[@id="merchant-info"]/a/span[contains(text(), "Fulfilled by")]/text()').get(
                "")
            buy_boy_type2 = response.xpath(
                '//div[@id="desktop_qualifiedBuyBox"]//div[@id="merchant-info"]//text()').get("")
            buy_boy_type2 = buy_boy_type2.split("and")[0].replace("Ships from",
                                                                  "").strip() if "Ships from" in buy_boy_type2 else ""

            buy_boy_type = buy_boy_type1 or buy_boy_type2 or buy_box[0] if buy_box and len(buy_box) >= 2 else ""

            item["buy_boy_type"] = buy_boy_type.replace("Fulfilled by Amazon", "Amazon").replace("Amazon Warehouse", "Amazon")
            item["seller_name"] = account_name
            url = response.xpath(
                '//div[@id="desktop_qualifiedBuyBox"]//div[@id="merchant-info"]/a[contains(@href, "seller=")]/@href').get(
                "") or response.xpath('//div[@id="desktop_qualifiedBuyBox"]//a[contains(@href, "seller=")]/@href').get(
                "")
            item["seller_id"] = item.get("seller_id") or \
                                url.split("seller=")[-1].split("&")[0] if url else ""
        else:
            item["account_name"] = buy_box[1] if len(buy_box) == 2 else buy_box[0] if buy_box else ""
            item["buy_boy_type"] = buy_box[0] if buy_box else ""
            item["seller_name"] = buy_box[1] if len(buy_box) == 2 else buy_box[0] if buy_box else ""

        item['weight_str'] = "" if "Dimensions for" in item["weight_str"] else item["weight_str"]
        item['volume'] = item["volume"].split(":")[-1] if "Dimensions & Weight:" in item["volume"] else item["volume"]

        item["account_id"] = response.meta.get("account_id") or item.get("seller_id") or ""
        item["updated_at"] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())


        # 亚马逊自营：
        # {"seller_id": null, "ship_from": "Amazon.com", "sold_by": "Amazon.com", "fulfilled_by": null}
        #
        # {"seller_id": null, "ship_from": "Fulfilled by Amazon", "sold_by": "Amazon Warehouse",
        #  "fulfilled_by": "Fulfilled by Amazon"}
        #
        # FBA：
        # {"seller_id": "AK3FA7ZJRNUJ7", "ship_from": "Amazon", "sold_by": "MEYSPRING", "fulfilled_by": null}
        #
        # FBM：
        # {"seller_id": "A25PV2D1WZYZ8F", "ship_from": "Limouyin-US", "sold_by": "Limouyin-US", "fulfilled_by": null}
        #
        # 断货：
        # {"seller_id": null, "ship_from": null, "sold_by": null, "fulfilled_by": null}
        ship_from = item['buy_boy_type']
        if item['page_inventory'] == 3:
            seller_dict = {"seller_id": None, "ship_from": None, "sold_by": None, "fulfilled_by": None}
        elif item['buy_box_seller_type'] == 1 and ship_from == 'Fulfilled by Amazon':
            seller_dict = {
                "seller_id": item['seller_id'] or None,
                "ship_from": item['buy_boy_type'] or None,
                "sold_by": item['seller_name'] or None,
                "fulfilled_by": "Fulfilled by Amazon"
            }
        else:
            seller_dict = {
                "seller_id": item['seller_id'] or None,
                "ship_from": item['buy_boy_type'] or None,
                "sold_by": item['seller_name'] or None,
                "fulfilled_by": None
            }
        item['seller_json'] = json.dumps(seller_dict, ensure_ascii=False)
        if item.get('productdetail_json'):
            item['productdetail_json']['productdetail_json_rank'] = item.get('productdetail_json_rank')
            item['productdetail_json'] = json.dumps(item['productdetail_json'])

        item['result_list_json'] = self.result_list_data(response)
        return item

    def variat_msgs(self, response):
        import re
        if variat_json := response.xpath("//script[contains(text(), 'parentAsin=') and contains(text(), 'variationValues')]").get():
            variationValues = re.findall(r'"variationValues" :(.*?}),', variat_json)
            asinVariationValues = re.findall(r'asinVariationValues" :(.*?}}),', variat_json)
            variationValues = json.loads(variationValues[0].strip()) if variationValues else ""
            asinVariationValues = json.loads(asinVariationValues[0].strip()) if asinVariationValues else ""
            asin_msgs = response.xpath(
                "//script[(contains(text(), 'parentAsin=') and contains(text(), 'dataToReturn') and contains(text(), 'dimensionToAsinMap'))]/text()").get(
                "")
            if asin_msgs:
                asin_msgs = asin_msgs.split("dataToReturn =")[-1].split("return dataToReturn")[0].replace(";",
                                                                                                          '').strip()
                parent_asin = re.findall('"parentAsin" : "(.*?)",', asin_msgs)
                parent_asin = parent_asin[0] if parent_asin else ""
            else:
                parent_asin = response.meta.get("asin")
            if asinVariationValues and variationValues:
                for i in asinVariationValues.values():
                    item = {}
                    asin = i["ASIN"]
                    color = variationValues.get("color_name")[int(i.get("color_name"))] if i.get("color_name") else ""
                    size = variationValues.get("size_name")[int(i.get("size_name"))] if i.get("size_name") else ""
                    style = variationValues.get("style_name")[int(i.get("style_name"))] if i.get("style_name") else ""
                    other = [k for k in i.keys() if k not in ['color_name', 'size_name', 'style_name', 'ASIN']]
                    column_2 = variationValues.get(other[0])[int(i.get(other[0]))] if other else ""
                    item["asin"] = asin
                    item["color"] = color
                    item["size"] = size
                    item["style"] = style
                    item["column_2"] = column_2
                    item["parent_asin"] = parent_asin
                    if if_state := response.xpath("//li[@class='swatchSelect']").getall():
                        model = item["color"] or item["size"] or item["style"]
                        if model.lower().strip() in [i.lower().strip() for i in if_state]:
                            item["state"] = "2"
                        else:
                            item["state"] = "1"
                    else:
                        item["state"] = "1"
                    # print(item)
                    yield item
            elif variationValues:
                dimensionValuesDisplayData = re.findall(r'dimensionValuesDisplayData" :(.*?}),', variat_json)
                dimensionValuesDisplayData = json.loads(dimensionValuesDisplayData[0].strip()) if dimensionValuesDisplayData else ""
                for k, v in dimensionValuesDisplayData.items():
                    item = {}
                    asin = k
                    # size = v[list(variationValues.keys()).index('size_name')] if variationValues.get("size_name") else ""
                    # style = v[list(variationValues.keys()).index('style_name')] if variationValues.get("style_name") else ""
                    other = [k for k in variationValues.keys() if k not in ['color_name', 'size_name', 'style_name', 'ASIN']]
                    color = [i for i in v if i in variationValues.get("color_name", [])]
                    size = [i for i in v if i in variationValues.get("size_name", [])]
                    style = [i for i in v if i in variationValues.get("style_name", [])]
                    column_2 = [i for i in v if i in variationValues.get(other[0])] if other else ""
                    item["asin"] = asin
                    item["color"] = color[0] if color else ""
                    item["size"] = size[0] if size else ""
                    item["style"] = style[0] if style else ""
                    item["column_2"] = column_2[0] if column_2 else ""
                    item["parent_asin"] = parent_asin
                    if if_state := response.xpath("//li[@class='swatchSelect']").getall():
                        model = item["color"] or item["size"] or item["style"]
                        if model.lower().strip() in [i.lower().strip() for i in if_state]:
                            item["state"] = "2"
                        else:
                            item["state"] = "1"
                    else:
                        item["state"] = "1"
                    # print(item)
                    yield item
            else:
                return {}
        else:
            return {}

    def asin_seller_account_syn(self, response):
        """买家账号存储"""
        u = {
            "us": "Sold by",
            "uk": "Sold by",
            "fr": "Vendu par",
            "de": "Verkäufer",
            "es": "Vendido por",
            "it": "Venditore",


        }
        account_name = response.xpath(
            f'//div[@id="desktop_qualifiedBuyBox"]//div[@tabular-attribute-name="{u.get(self.site)}"]//span[contains(@class, "a-size-small") and contains(@class, "text") or ./a[@id="sellerProfileTriggerId"]]//text()').get()
        url1 = response.xpath(
            f'//div[@id="desktop_qualifiedBuyBox"]//div[@tabular-attribute-name="{u.get(self.site)}"]//span[contains(@class, "a-size-small") and contains(@class, "text") or ./a[@id="sellerProfileTriggerId"]]/a/@href').get()
        url2 = response.xpath(
            f'//div[@id="desktop_qualifiedBuyBox"]//div[@tabular-attribute-name="{u.get(self.site)}" and @class="tabular-buybox-text"]//span[contains(@class, "a-size-small") or ./a[@id="sellerProfileTriggerId"]]/a/@href').get()

        if not url1 and not url2:
            return None
        else:
            url = self.url_ + url1 or url2

            account_syn_date = {
                'account_name': account_name,
                'url': url,
            }
            return account_syn_date

    def asin_image(self, response):
        img_url = response.xpath("|".join(ASIN_XPATH.get('us').get('img_url'))).getall()
        img_url = [i for i in img_url if ".gif" not in i]
        img_item = []
        for i, k in enumerate(img_url):
            if ('play-' in k and len(k) > 82) or (
                    'video.' in k and len(k) > 99):
                img_date = {
                    "asin": response.meta.get("asin"),
                    "img_url": k,
                    "img_order_by": i+1,
                    "data_type": 2,
                    # "site": response.meta["site"]
                }
                img_item.append(img_date)
            elif ".gif" in k:
                continue
            else:
                img_date = {
                    "asin": response.meta.get("asin"),
                    "img_url": k,
                    "img_order_by": i+1,
                    "data_type": 1,
                    # "site": response.meta["site"]
                }
                img_item.append(img_date)
        aplus_img_list = response.xpath('//div[@id="aplus"]//@src').getall()
        if aplus_img_list:
            try:
                aplus_img_src_url = aplus_img_list[1]
                img_date = {
                    "asin": response.meta.get("asin"),
                    "img_url": aplus_img_src_url,
                    "img_order_by": 1,
                    "data_type": 3,
                    # "site": response.meta["site"]
                }
                img_item.append(img_date)
            except Exception as e:
                print("============== 获取A+ url 报错 ==============")
        # pprint(img_item)
        return img_item

    def asin_bs_category_asin_detail(self, response):
        u = {
            "us": "Sellers Rank",
            "uk": "Best Sellers Rank",
            "fr": "ventes d'Amazon",
            "de": "Bestseller-Rang",
            "es": "vendidos de Amazon",
            "it": "Bestseller di Amazon",

        }
        best_sellers_rank = response.xpath(f"""//*[contains(text(), "{u.get(self.site)}")]/parent::*//text()|//*[contains(text(), "{u.get(self.site)}")]/parent::*//text()""").getall()
        rank = " ".join([i for i in best_sellers_rank]).split(f"""{u.get(self.site)}""")[-1].replace(":", "").strip()
        last_herf_list1 = response.xpath(f"""//*[contains(text(), "{u.get(self.site)}")]/parent::*//a/@href""").getall()
        last_herf_list2 = response.xpath(f"""//*[contains(text(), "{u.get(self.site)}")]/parent::*/td/span/span//a/@href""").getall()

        cate_name1 = response.xpath(f"""//*[contains(text(), "{u.get(self.site)}")]/parent::*//a/text()""").getall()
        cate_name2 = response.xpath(f"""//*[contains(text(), "{u.get(self.site)}")]/parent::*/td/span/span//a/text()""").getall()
        if last_herf_list1:
            last_herf = last_herf_list1
        elif last_herf_list2:
            last_herf = last_herf_list2
        else:
            return None
        if not rank:
            return None
        bs_item = {
            "asin": response.meta.get("asin"),
            "week": response.meta.get("week"),
            "best_sellers_rank": rank,
            "last_herf": last_herf,
            "cate_name": cate_name1 or cate_name2
        }
        # pprint(bs_item)
        return bs_item

    def result_list_data(self, response):
        # 分别抓取所有 carousel 的 data-options 和它们的标题 h2
        data_options_list = response.xpath(
            "//div[@data-marketplaceid='ATVPDKIKX0DER']/@data-a-carousel-options").getall()
        h2_list = response.xpath("//div[@data-marketplaceid='ATVPDKIKX0DER']//h2/text()").getall()
        result = {}
        result_sp = {}
        result_list = []
        # Customers also search us_B0D4QGW5RX.html
        data_sp_list = response.xpath(
            "//div[@class='a-column a-span8']/h2[contains(@class,'carousel-heading')]/text()").getall()
        for sp_h2 in data_sp_list:
            print(sp_h2)
            if sp_h2 != 'Videos':
                data_sp = response.xpath(
                    f"""//div[@class='a-column a-span8']/h2[contains(text(),"{sp_h2}")]/parent::div/parent::div/parent::div/parent::div/@data-a-carousel-options""").getall()
                if data_sp:
                    decoded_sp = html_module.unescape(data_sp[0])
                    decoded_sp = json.loads(decoded_sp)
                    if decoded_sp.get('ajax'):
                        inner_sp_h2_list = decoded_sp.get('ajax', {}).get('id_list', [])
                        sp_h2_asin_list = [item.split('|')[0] for item in inner_sp_h2_list]
                        if sp_h2_asin_list:
                            result_sp[sp_h2] = sp_h2_asin_list
        if result_sp:
            result_list.append(result_sp)
        if h2_list and data_options_list:
            count = min(len(data_options_list), len(h2_list))
            for i in range(count):
                raw_json_str = data_options_list[i]
                title = h2_list[i].strip()
                # 解码 → 解析 → 提取 id 列表
                outer = json.loads(raw_json_str)
                inner_list = outer.get('ajax', {}).get('id_list', [])
                asin_list = [json.loads(item)['id'] for item in inner_list]
                result[title] = asin_list
            if result:
                result_list.append(result)
        h2_str_list = response.xpath(
            '//h2[contains(@class,"a-spacing-medium")]/text()|//div[@class="a-column a-span8"]/h2[contains(@class,"carousel-heading")]/text()').getall()
        if h2_str_list:
            for h2_str in h2_str_list:
                if h2_str != 'Videos':
                    data_asin_list = response.xpath(
                        f"""//h2[contains(text(),"{h2_str}")]/parent::div/parent::div//@data-asin|//h2[contains(text(),"{h2_str}")]/parent::div/parent::div/parent::div//@data-asin""").getall()
                    print('h2_str_list::', h2_str, data_asin_list)
                    if data_asin_list:
                        result[h2_str] = data_asin_list
                        result_list.append(result)
        print('result_list 广告流量ASIN:', result_list)
        if result_list:
            result_list_json = json.dumps(result_list, ensure_ascii=False)
        else:
            result_list_json = ''
        return result_list_json