import sys
import os


sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
import pandas as pd
from utils.db_connect import BaseUtils
from amazon_params.params import DB_REQUESTS_ASIN_PARAMS
import time
from difflib import SequenceMatcher
from threading_spider.db_connectivity import connect_db

print('查询 self_asin_detail 表信息来更新erp_asin 的异常')


class Save_asin_self(BaseUtils):
    def __init__(self, site_name='us'):
        super().__init__()
        self.site_name = site_name  # 站点
        self.time_strftime = time.strftime("%Y-%m-%d", time.localtime())

    def check_contain_chinese(self, check_str):
        """
        判断获取文本是否有中文
        """
        for c in check_str:
            if '\u4e00' <= c <= '\u9fa5':
                print('--是中文，--')
                return True

    def init_db_names(self):
        self.engine = self.mysql_connect()
        self.db_erp_asin = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_us_erp_asin'][2:]
        self.db_self_asin_detail = self.site_name + DB_REQUESTS_ASIN_PARAMS['db_self_asin_detail'][2:]
        sql_read = "SELECT text_name FROM censored_thesaurus WHERE data_type='负面词汇'"
        print(sql_read)
        df = pd.read_sql(sql_read, con=self.engine)
        self.text_list = list(df.text_name)
        print('负面词汇:', self.text_list)
        # asin_sql = f"SELECT asin,sku,erp_seller,{self.site_name}_upload_info,title,`describe` as describe_str ,selling from {self.site_name}_erp_asin_syn WHERE created_at>='2023-05-11' and asin_type=1;"
        asin_sql = f"SELECT asin,sku,erp_seller,{self.site_name}_upload_info,title,`describe` as describe_str ,selling,is_variation,fulFillable from {self.site_name}_erp_asin_syn WHERE created_at>='2023-05-11';"
        print('asin_sql::', asin_sql)
        df_asin = pd.read_sql(asin_sql, con=self.engine)
        self.asin_list = list(df_asin.asin)
        print(len(self.asin_list))
        df_asin[f'{self.site_name}_upload_info'].fillna('N/A', inplace=True)
        # 处理每个字段的值，如果为 None 就替换为空字符串
        df_asin['title'] = df_asin['title'].fillna('')
        df_asin['fulFillable'] = df_asin['fulFillable'].fillna('0')
        df_asin['is_variation'] = df_asin['is_variation'].fillna('2')
        df_asin['describe_str'] = df_asin['describe_str'].fillna('')  # Use the correct column name here
        df_asin['selling'] = df_asin['selling'].fillna('')
        if self.site_name == 'us':
            asin_data_list = list(
                df_asin.asin + '|+|' + df_asin.sku + '|+|' + df_asin.erp_seller + '|+|' +
                df_asin['us_upload_info'].fillna('').astype(str) + '|+|' +
                df_asin.title + '|+|' + df_asin.describe_str + '|+|' + df_asin.selling + '|+|' + df_asin.fulFillable.astype(str) + '|+|' + df_asin.is_variation.astype(str)
            )

        elif self.site_name == 'uk':
            asin_data_list = list(
                df_asin.asin + '|+|' + df_asin.sku + '|+|' + df_asin.erp_seller + '|+|' +
                df_asin['uk_upload_info'].fillna(
                    '') + '|+|' + df_asin.title + '|+|' + df_asin.describe_str + '|+|' + df_asin.selling + '|+|' + df_asin.fulFillable.astype(str) + '|+|' + df_asin.is_variation.astype(str)
            )
        self.item_asin = {}
        for data in asin_data_list:
            if isinstance(data, str):  # 检查数据是否为字符串
                data_list = data.split('|+|')
                self.item_asin[data_list[0]] = data
        self.select_self_asin_detail()

    def select_self_asin_detail(self):
        sava_data = []
        asin_tuple = tuple(self.asin_list)
        # 先排除状态 4 的变狗 的asin
        self_all_syn_sql = f'SELECT asin from {self.site_name}_self_all_syn WHERE asin in {asin_tuple} and state=4 and updated_at>="{self.time_strftime}"'
        # print(self_all_syn_sql)
        self_all_syn_sql_1 = f'SELECT asin from {self.site_name}_self_real_spider WHERE asin in {asin_tuple} and state=4 and updated_at>="{self.time_strftime}"'
        # print(self_all_syn_sql_1)
        df_asin_error = pd.read_sql(self_all_syn_sql, con=self.engine)
        df_asin_error_1 = pd.read_sql(self_all_syn_sql_1, con=self.engine)
        asin_error_ = list(df_asin_error.asin)
        asin_error_1 = list(df_asin_error_1.asin)
        asin_error_list = asin_error_1.extend(asin_error_)
        if asin_error_list:
            print("asin_error_list::", asin_error_list)
            for asin in list(set(asin_error_list)):
                asin_data = self.item_asin.get(asin)
                err_4_list = []
                if asin_data:
                    asin_erp_data_list = asin_data.split('|+|')
                    sku = asin_erp_data_list[1]
                    erp_seller = asin_erp_data_list[2]
                    err_4_list.append(asin)
                    err_4_list.append(sku)
                    err_4_list.append(erp_seller)
                    err_4_list.append(2)
                    sava_data.append(err_4_list)
                    if asin in self.asin_list:
                        self.asin_list.remove(asin)
        df = pd.DataFrame(data=sava_data,
                          columns=['asin', "sku", 'erp_seller', 'page_error'])
        df.to_sql(f'{self.site_name}_erp_asin', con=self.engine, if_exists="append", index=False)
        sava_data = []
        asin_tuple = tuple(self.asin_list)

        asin__detail_sql = f"SELECT asin,title,img_num,`describe`,category,page_inventory,search_category,product_description,img_type from {self.site_name}_self_asin_detail WHERE site='{self.site_name}' and created_at>='{self.time_strftime}' and asin in {asin_tuple};"
        df_asin_detail = pd.read_sql(asin__detail_sql, con=self.engine)
        fields_list = df_asin_detail.values.tolist()
        for asin_data in fields_list:
            data_list = []
            asin = asin_data[0]
            title = asin_data[1]
            img = asin_data[2]
            describe = asin_data[3]
            category = asin_data[4]
            img_type_str = asin_data[8]
            if img_type_str:
                img_type = img_type_str.split(',')[-1]
            else:
                img_type = '-1'
            asin_erp_data = self.item_asin.get(asin)
            if asin_erp_data:
                asin_erp_data_list = asin_erp_data.split('|+|')
                sku = asin_erp_data_list[1]
                erp_seller = asin_erp_data_list[2]
                category_upload_info = asin_erp_data_list[3]
                syn_title = asin_erp_data_list[4]  # 标题
                if syn_title is None:
                    syn_title = ''
                syn_describe = asin_erp_data_list[5]  # 五点描述
                if syn_describe is None:
                    syn_describe = ''
                syn_selling = asin_erp_data_list[6]  # 产品描述
                if syn_selling is None:
                    syn_selling = ''
                fulFillable = asin_erp_data_list[7]  # ful
                is_variation = asin_erp_data_list[8]  # 是否为变体
            else:
                continue
            page_inventory = asin_data[5]
            search_category = asin_data[6]
            product_description = asin_data[7]
            if title and self.check_contain_chinese(title):
                title_error = 3
            else:
                if title is not None:
                    if len(title) < 35:
                        title_error = 2
                    else:
                        title_error = 1
                else:
                    title_error = 2

            if title_error == 1:
                for i in self.text_list:
                    wrods1 = f" {i},"
                    wrods2 = f" {i} "
                    wrods3 = f", {i} "
                    if wrods1.lower() in title.lower():
                        title_error = 5
                        break
                    elif wrods2.lower() in title.lower():
                        title_error = 5
                        break
                    elif wrods3.lower() in title.lower():
                        title_error = 5
                        break
            if title_error == 1 and len(syn_title) > 5:
                print(syn_title, 333333333333333333333333333, title)
                if self.Compare_str(title, syn_title) > 10:
                    title_error = 6  # 页面标题和系统上的标题不一样
            elif title_error == 1 and len(syn_title) < 5:
                title_error = 1  # 页面五点描述和系统上的五点描述不一样
            if img:
                if img <= 4:
                    img_error = 2
                else:
                    img_error = 1
            else:
                img_error = 2

            bullet_list = []
            selling_error = 1
            if describe:
                bullets_list = describe.split('|-|')
                for bullets in bullets_list:
                    bullet = bullets.strip()
                    if bullet and self.check_contain_chinese(bullet):
                        selling_error = 3
                        break
                if selling_error == 3:
                    pass
                else:
                    for bullets in bullets_list:
                        bullet_1 = bullets.strip()
                        bullet_list.append(bullet_1)
                    str_bullet = ('').join(bullet_list)
                    if len(str_bullet) < 35:
                        selling_error = 2
                if selling_error == 1:
                    bullets_join = ''.join(bullets_list)
                    for i in self.text_list:
                        wrods1 = f" {i},"
                        wrods2 = f" {i} "
                        wrods3 = f", {i} "
                        if wrods1.lower() in bullets_join.lower():
                            selling_error = 5
                            break
                        elif wrods2.lower() in bullets_join.lower():
                            selling_error = 5
                            break
                        elif wrods3.lower() in bullets_join.lower():
                            selling_error = 5
                            break
                if selling_error == 1 and len(syn_describe) > 10:
                    if self.Compare_str(describe, syn_describe) < 0.85:
                        selling_error = 6  # 页面五点描述和系统上的五点描述不一样
                elif selling_error == 1 and len(syn_describe) < 10:
                    selling_error = 1  # 页面五点描述和系统上的五点描述不一样
            else:
                selling_error = 2
            print('search_category:', search_category)
            if category and search_category:
                category_str = category.replace(' ', '')
                print('category::', category_str)
                if 'Clothing,Shoes&Jewelry›' in category_str and 'All' in search_category:
                    search_ccategory_error = 1
                elif "Health&Household›" in category_str and 'Health, Household & Baby Care' in search_category:
                    search_ccategory_error = 1
                elif "Patio,Lawn&Garden›" in category_str and 'Home & Kitchen' in search_category:
                    search_ccategory_error = 1
                elif "Patio,Lawn&Garden›" in category_str and 'Garden & Outdoor' in search_category:
                    search_ccategory_error = 1
                elif "BabyProducts›" in category_str and 'Baby' in search_category:
                    search_ccategory_error = 1
                elif "Arts,Crafts&Sewing›" in category_str and 'Home & Kitchen' in search_category:
                    search_ccategory_error = 1
                elif "Tools&HomeImprovement›" in category_str and 'Home & Kitchen' in search_category:
                    search_ccategory_error = 1
                elif 'ALL' in search_category:
                    search_ccategory_error = 1
                elif 'Health&Household›' in category_str:
                    search_ccategory_error = 1
                else:
                    va = search_category.split(' ')
                    s_va = ''.join(va)
                    nav_search_label = s_va.replace(" ", "")
                    print('nav_search_label', nav_search_label, category)
                    if category.startswith(nav_search_label):
                        search_ccategory_error = 1
                    else:
                        search_ccategory_error = 2
                if search_ccategory_error == 2:
                    va_s = search_category.split(' ')
                    nav_search_label_s = va_s[0].replace(" ", "")
                    print(nav_search_label_s, '2222222222222222222', category)
                    if category.startswith(nav_search_label_s):
                        search_ccategory_error = 1
            else:
                if search_category:
                    search_ccategory_error = 1
                else:
                    search_ccategory_error = 2

            if category:
                if self.check_contain_chinese(category):
                    ccategory_error = 2
                else:
                    if len(category_upload_info) > 5:
                        category_erp = category_upload_info.replace(' ', '').replace('>', '›')
                        print('category_erp::', category_erp)
                        category = category.replace(' ', '')
                        print("category_asin", category)
                        if category != category_erp:
                            ccategory_error = 4
                        else:
                            ccategory_error = 1
                    else:
                        ccategory_error = 1
            else:
                ccategory_error = 3

            if page_inventory:
                buy_now_error = 1
            else:
                buy_now_error = 2

            if product_description and len(syn_selling) > 10 and img_type != '3':
                if self.Compare_str(product_description, syn_selling) < 0.85:
                    describe_error = 2  # 页面底部产品描述和系统上的产品描述不一样
                else:
                    describe_error = 1
            else:
                describe_error = 1
            if int(float(is_variation)) == 1 and int(float(fulFillable)) == 0:  # asin页面为变体并且ful=0，则无需判断所有文案异常
                describe_error = 1  # 底部描述
                title_error = 1  # 标题描述
                selling_error = 1  # 五点描述
            data_list.append(asin)
            data_list.append(title_error)
            data_list.append(img_error)
            data_list.append(selling_error)
            data_list.append(search_ccategory_error)
            data_list.append(ccategory_error)
            data_list.append(buy_now_error)
            data_list.append(sku)
            data_list.append(erp_seller)
            data_list.append(describe_error)
            sava_data.append(data_list)
        print(sava_data)
        df = pd.DataFrame(data=sava_data,
                          columns=['asin', "title_error", 'img_error', 'selling_error', 'search_ccategory_error',
                                   'ccategory_error', 'buy_now_error', 'sku', 'erp_seller', 'describe_error'])
        df.to_sql(f'{self.site_name}_erp_asin', con=self.engine, if_exists="append", index=False)

    # def Compare_str(self,str1, str2):
    #     # 找出两个字符串中的最短长度
    #     min_length = min(len(str1), len(str2))
    #     # 初始化计数器
    #     difference_count = 0
    #     # 比较字符并计算不同字符的数量
    #     for i in range(min_length):
    #         if str1[i] != str2[i]:
    #             difference_count += 1
    #     # 考虑字符串长度不同的情况，将多余的字符数添加到差异计数中
    #     difference_count += abs(len(str1) - len(str2))
    #     return difference_count

    def Compare_str(self, str1, str2):
        # 移除可能导致干扰的特殊字符  会区分大小写
        str1 = str1.replace('|-|', '')
        str2 = str2.replace('|-|', '')
        # 使用 SequenceMatcher 计算相似性比率
        similarity_ratio = SequenceMatcher(None, str1, str2).ratio()
        print(similarity_ratio)
        return similarity_ratio

    def run(self):
        self.init_db_names()
        account = 'pengyanbing'
        time_strftime = time.strftime('%Y %m %d %H:%M:%S', time.localtime(time.time()))
        title = f'{self.site_name} 站点 每日 erp asin'
        content = f'{time_strftime} 更新 erp asin 异常分类更新。正常'
        connect_db(None).send_mg(account, title, content)


if __name__ == '__main__':
    Save_asin_self(site_name='us').run()
    Save_asin_self(site_name='uk').run()
