import pandas as pd
import re
from datetime import datetime
import psycopg2
import redis
from collections import Counter
import json
from sqlalchemy import create_engine
import requests
import traceback
import sys
import numpy as np
from itertools import chain


# 定义自定义 DataFrame 类
class MyDataFrame(pd.DataFrame):
    def __init__(self, site_name='us', date_info='2024-04', num='5000'):
        super(MyDataFrame, self).__init__()
        self.site_name = site_name
        self.date_info = date_info
        self.limit_num = num

    def connect_to_database(self):
        # 设置数据库连接信息
        host = "192.168.10.223"
        user = "postgres"
        password = "F9kL2sXe81rZq"
        # 连接到数据库
        while True:
            try:
                if self.site_name == 'us':
                    db = 'selection'
                else:
                    db = f'selection_{self.site_name}'
                engine_pg = create_engine(
                    f"postgresql+psycopg2://{user}:{password}@{host}:5432/{db}",
                    encoding='utf-8', connect_args={"connect_timeout": 10})
                return engine_pg
            except Exception as e:
                print("pg_connect 14 t11111111111111111111111:", e, f"\n{traceback.format_exc()}")
                continue

    # 定义检查 ASIN 的函数
    def check_asin(self, asin):
        row = self.df.loc[self.df.asin == asin].asin
        row = list(row)
        if len(asin) != 10:
            # return f"Error_asin_01: 长度异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01长度异常', 'check_rules': '长度异常', 'row_list': row}
        elif not bool(re.match(r'^[a-zA-Z0-9]+$', asin)):
            # return f"Error_asin_02: 包含非数字或者字母异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01: Error_asin_02包含非数字或者字母异常',
                    'check_rules': '包含非数字或者字母异常', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': '包含非数字或者字母异常', 'row_list': None}

    # 定义检查 img_url 的函数
    def check_img_url(self, img_url):
        row = self.df.loc[self.df.img_url == img_url].asin
        row = list(row)
        if img_url is None or img_url.strip() == "":
            return {'error': '异常', 'error_detail': 'Error_asin_01: Error_asin_01:img_url为空异常',
                    'check_rules': 'img_url为空异常', 'row_list': row}
            # return "Error_asin_01:img_url为空异常"
        elif img_url.strip().lower() in ["null", "none", "nan", '0']:
            # return "Error_asin_01:img_url为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:img_url为空异常', 'check_rules': 'img_url为空异常',
                    'row_list': row}
        elif not img_url.startswith("https://"):
            # return "Error_asin_02:img_url异常(没有https)"
            return {'error': '异常', 'error_detail': 'Error_asin_02:img_url异常(没有https)',
                    'check_rules': 'img_url异常(没有https)', 'row_list': row}
        elif not re.search(r'\.(jpg|png|gif)$', img_url.lower()):
            # return "Error_asin_02:img_url异常(结尾不是.jpg、.png、.gif)"
            return {'error': '异常', 'error_detail': 'Error_asin_02:img_url异常(结尾不是.jpg、.png、.gif)',
                    'check_rules': 'img_url异常(结尾不是.jpg、.png、.gif)', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'img_url异常(结尾不是.jpg、.png、.gif)',
                    'row_list': None}

    # 定义检查 title 的函数
    def check_title(self, title):
        row = self.df.loc[self.df.title == title].asin
        row = list(row)
        if title is None or title.strip() == "":
            # return "Error_asin_03:为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_03:为空异常', 'check_rules': 'title为空异常', 'row_list': row}
        elif title.strip().lower() in ["null", "none", "nan", '0']:
            # return "Error_asin_01:img_url为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:img_url为空异常', 'check_rules': 'title为空异常',
                    'row_list': row}
        elif not isinstance(title, str):
            # return "Error_asin_02:异常,必须是字符串"
            return {'error': '异常', 'error_detail': 'Error_asin_02:异常,必须是字符串', 'check_rules': '必须是字符串', 'row_list': row}
        elif not (2 < len(title.strip()) < 400):
            # return "Error_asin_01:长度异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:长度异常', 'check_rules': '长度异常 长度校验400', 'row_list': row}
        elif title.startswith("https://") or re.search(r'\.(jpg|png|gif)$', title.lower()):
            # return "Error_asin_02:出现https开头 .jpg等结尾异常"
            return {'error': '异常', 'error_detail': 'Error_asin_02:出现https开头 .jpg等结尾异常',
                    'check_rules': 'title 出现https开头 .jpg等结尾异常', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'title 出现https开头 .jpg等结尾异常', 'row_list': None}

    # 定义检查 title_len 的函数
    def check_title_len(self, title_len):
        row = self.df.loc[self.df.title_len == title_len].asin
        row = list(row)
        if title_len is None or title_len == "":
            # return "Error_asin_03:为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_03:为空异常', 'check_rules': 'img_url为空异常', 'row_list': row}
        elif title_len in ["null", "none", "nan", '0']:
            # return "Error_asin_01:img_url为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:img_url为空异常', 'check_rules': 'img_url为空异常',
                    'row_list': row}
        elif not (2 < title_len < 400):
            # return "Error_asin_01:长度异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:长度异常', 'check_rules': '长度异常', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': '长度异常', 'row_list': None}

    # 定义检查 price 的函数
    def check_price(self, price):
        row = self.df.loc[self.df.price == price].asin
        row = list(row)
        price = int(price)
        if (not isinstance(price, (int, float))) or (price < 0) or price in ["null", "none", None, "", '0']:
            # return "Error_asin_01:数值异常，只能是非负数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:数值异常，只能是非负数', 'check_rules': '数值异常，只能是非负数',
                    'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': '数值异常，只能是非负数', 'row_list': None}

    # 定义检查 rating 的函数
    def check_rating(self, rating):
        row = self.df.loc[self.df.rating == rating].asin
        row = list(row)
        if pd.isna(rating) or rating in ["null", "none", None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '数值异常，只能是0~5', 'row_list': None}
        elif not 0 <= int(rating) <= 5:
            # return "Error_asin_01:数值异常，只能是0~5"
            return {'error': '异常', 'error_detail': 'Error_asin_01:数值异常，只能是0~5', 'check_rules': '数值异常，只能是0~5',
                    'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': '数值异常，只能是0~5', 'row_list': None}

    # 定义检查 total_comments 的函数
    def check_total_comments(self, total_comments):
        row = self.df.loc[self.df.total_comments == total_comments].asin
        row = list(row)
        if int(total_comments) < 0:
            # return "Error_asin_01:数值异常，不能为负数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:数值异常，不能为负数', 'check_rules': '数值异常，不能为负数',
                    'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': '数值异常，不能为负数', 'row_list': None}

    # 定义检查 buy_box_seller_type 的函数
    def check_buy_box_seller_type(self, buy_box_seller_type):
        row = self.df.loc[self.df.buy_box_seller_type == buy_box_seller_type].asin
        row = list(row)
        if buy_box_seller_type not in [1, 2, 3, 4]:
            # return "Error_asin_01:数值异常，不属于1，2，3，4"
            return {'error': '异常', 'error_detail': 'Error_asin_01:数值异常，不属于1，2，3，4', 'check_rules': '数值异常，不属于1，2，3，4',
                    'row_list': row}
        elif buy_box_seller_type is None or buy_box_seller_type == "":
            # return "Error_asin_01:buy_box_seller_type为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:buy_box_seller_type为空异常',
                    'check_rules': 'buy_box_seller_type为空异常', 'row_list': row}
        elif buy_box_seller_type in ["null", "none", "nan", '0']:
            # return "Error_asin_01:buy_box_seller_type为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:buy_box_seller_type为空异常',
                    'check_rules': 'buy_box_seller_type为空异常', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'buy_box_seller_type为空异常', 'row_list': None}

    # 定义检查 page_inventory 的函数
    def check_page_inventory(self, page_inventory):
        row = self.df.loc[self.df.page_inventory == page_inventory].asin
        row = list(row)
        if page_inventory not in [1, 2, 3]:
            # return "Error_asin_01:数值异常，不属于1，2，3"
            return {'error': '异常', 'error_detail': 'Error_asin_01:数值异常，不属于1，2，3', 'check_rules': '数值异常，不属于1，2，3',
                    'row_list': row}
        elif page_inventory is None or page_inventory == "":
            # return "Error_asin_01:page_inventory为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:page_inventory为空异常',
                    'check_rules': 'page_inventory为空异常', 'row_list': row}
        elif page_inventory in ["null", "none", "nan", '0']:
            # return "Error_asin_01:page_inventory为空异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:page_inventory为空异常',
                    'check_rules': 'page_inventory为空异常', 'row_list': row}
        else:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'page_inventory为空异常', 'row_list': None}

    # 定义检查 category 的函数
    def check_category(self, category):
        row = self.df.loc[self.df.category == category].asin
        row = list(row)
        if category == '无' or '›' in category:
            return {'error': '正常', 'error_detail': None, 'check_rules': '是无也不包含 > ', 'row_list': None}
        else:
            # return "Error_asin_01:不是无也不包含'>' 异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:不是无也不包含 > 异常', 'check_rules': '是无也不包含 > ',
                    'row_list': row}

    # 定义检查 volume 的函数
    def check_volume(self, volume):
        row = self.df.loc[self.df.volume == volume].asin
        row = list(row)
        if volume in ["null", "none", "nan", None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '没有x出现或者出现了-', 'row_list': None}
        elif 'x' in volume and '-' not in volume:
            return {'error': '正常', 'error_detail': None, 'check_rules': '没有x出现或者出现了-', 'row_list': None}
        else:
            # return "Error_asin_01:异常,没有'x'出现或者出现了'-'"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常,没有x出现或者出现了-', 'check_rules': '没有x出现或者出现了',
                    'row_list': row}

    # 定义检查 weight 的函数
    def check_weight(self, weight):
        row = self.df.loc[self.df.weight == weight].asin
        row = list(row)
        if pd.isna(weight) or weight in ["null", "none", None, "", '0'] or (
                isinstance(weight, (int, float)) and weight >= 0):
            return {'error': '正常', 'error_detail': None, 'check_rules': '不是正数', 'row_list': None}
        else:
            # return "Error_asin_01:异常,不是正数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常,不是正数', 'check_rules': '不是正数', 'row_list': row}

    # 定义检查 rank 的函数
    def check_rank(self, rank):
        self.df['rank'].fillna('0', inplace=True)  # 将 NaN 值填充为零或其他合适的值
        row = self.df.loc[self.df['rank'] == rank].asin
        # row = self.df.loc[self.df.rank == rank].asin
        row = list(row)
        if pd.isna(rank) or rank in ["null", "none", None, "", '0'] or (isinstance(rank, (int, float)) and rank > 0):
            return {'error': '正常', 'error_detail': None, 'check_rules': '不是非负数', 'row_list': None}
        else:
            # return "Error_asin_01:异常，不是非负数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常，不是非负数', 'check_rules': '不是非负数', 'row_list': row}

    # 定义检查 launch_time 的函数
    def check_launch_time(self, launch_time):
        row = self.df.loc[self.df.launch_time == launch_time].asin
        row = list(row)
        # 检查 launch_time 是否是日期对象，如果是，将其转换为字符串
        if hasattr(launch_time, 'strftime'):
            launch_time_str = launch_time.strftime('%Y-%m-%d')
        elif pd.isna(launch_time) or launch_time in ["null", "none", None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '是否是日期对象，如果是，将其转换为字符串', 'row_list': None}
        else:
            launch_time_str = launch_time

        try:
            # 尝试将输入的时间字符串解析为日期对象
            datetime.strptime(launch_time_str, '%Y-%m-%d')
        except ValueError:
            # return "Error_asin_01:格式异常"
            return {'error': '异常', 'error_detail': 'Error_asin_01:格式异常', 'check_rules': '是否是日期对象，如果是，将其转换为字符串',
                    'row_list': row}

        # 将输入的时间字符串解析为日期对象
        launch_date = datetime.strptime(launch_time_str, '%Y-%m-%d')
        # 定义时间区间的起始日期和结束日期
        start_date = datetime(1500, 1, 1)
        end_date = datetime(2099, 12, 31)
        # 检查输入的日期是否在指定的区间内
        if start_date <= launch_date <= end_date:
            return {'error': '正常', 'error_detail': None, 'check_rules': '输入的日期是否在指定的区间内 1500-2099 年', 'row_list': None}
        else:
            # return "Error_asin_02:时间区间异常"
            return {'error': '异常', 'error_detail': 'Error_asin_02:时间区间异常', 'check_rules': '输入的日期是否在指定的区间内 1500-2099 年',
                    'row_list': row}

    # 定义检查 img_num 的函数
    def check_img_num(self, img_num):
        row = self.df.loc[self.df.img_num == img_num].asin
        row = list(row)
        if (isinstance(img_num, int) and 0 <= img_num <= 10):
            return {'error': '正常', 'error_detail': None, 'check_rules': '不属于[0,10]这个区间，或者不是非负整数', 'row_list': None}
        else:
            # return "Error_asin_01:不属于[0,10]这个区间，或者不是非负整数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:不属于[0,10]这个区间，或者不是非负整数',
                    'check_rules': '不属于[0,10]这个区间，或者不是非负整数', 'row_list': row}

    # 定义检查 img_type 的函数
    def check_img_type(self, img_type):
        row = self.df.loc[self.df.img_type == img_type].asin
        row = list(row)
        if pd.isna(img_type) or img_type in ["null", "none", None, ""]:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'img_list 包含 1、2、3 这三个数字，且长度不超过 3',
                    'row_list': None}
        # 将字符串类型的 img_type 转换为列表
        img_list = img_type.split(',')
        # 如果 img_list 包含 1、2、3 这三个数字，且长度不超过 3，则返回正常
        if set(img_list).issubset({'1', '2', '3'}) and len(img_list) <= 3:
            return {'error': '正常', 'error_detail': None, 'check_rules': 'img_list 包含 1、2、3 这三个数字，且长度不超过 3',
                    'row_list': None}
        else:
            # return "Error_asin_01:异常，不符合要求"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常，不符合要求',
                    'check_rules': 'img_list 包含 1、2、3 这三个数字，且长度不超过 3', 'row_list': row}

    # 定义检查 brand 的函数
    def check_brand(self, brand):
        row = self.df.loc[self.df.brand == brand].asin
        row = list(row)
        # 如果 brand 是空值或者非法值，则返回正常
        if pd.isna(brand) or brand in [None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '不包含 null none', 'row_list': None}
        # 如果 brand 全部由数字组成，则返回异常
        if brand in ["null", "none", ]:
            # return "Error_asin_01:异常，品牌字符串 null none"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常，品牌字符串 null none', 'check_rules': '不包含 null none',
                    'row_list': row}
        # 否则返回正常
        return {'error': '正常', 'error_detail': None, 'check_rules': '不包含 null none', 'row_list': None}

    # 定义检查 node_id 的函数
    def check_node_id(self, node_id):
        row = self.df.loc[self.df.node_id == node_id].asin
        row = list(row)
        if pd.isna(node_id) or node_id in ["null", "none", None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '必须是正整数,长度不能超过 20', 'row_list': None}

        # 如果 node_id 不是正整数，则返回异常
        if not str(node_id).isdigit() or int(node_id) <= 0:
            # return "Error_asin_01:异常，必须是正整数"
            return {'error': '异常', 'error_detail': 'Error_asin_01:异常，必须是正整数', 'check_rules': '必须是正整数,长度不能超过 20',
                    'row_list': row}

        # 如果 node_id 的长度超过 20，则返回异常
        if len(str(node_id)) > 20:
            # return "Error_asin_02:异常，长度不能超过 20"
            return {'error': '异常', 'error_detail': 'Error_asin_02:异常，长度不能超过 20', 'check_rules': '必须是正整数,长度不能超过 20',
                    'row_list': row}
        # 否则返回正常
        return {'error': '正常', 'error_detail': None, 'check_rules': '必须是正整数,长度不能超过 20', 'row_list': None}

    # 定义检查 buy_sales 的函数
    def check_buy_sales(self, buy_sales):
        row = self.df.loc[self.df.buy_sales == buy_sales].asin
        row = list(row)
        # 如果 buy_sales 是空值或者非法值，则返回正常
        if pd.isna(buy_sales) or buy_sales in ["null", "none", None, "", '0']:
            return {'error': '正常', 'error_detail': None, 'check_rules': '购买销量必须带有数字,购买销量必须包含 boughtinpastmonth',
                    'row_list': None}
        # 如果 buy_sales 中只出现 boughtinpastmonth，而没有数字，则返回异常
        if "boughtinpastmonth" in buy_sales.lower():
            if any(char.isdigit() for char in buy_sales):
                return {'error': '正常', 'error_detail': None, 'check_rules': '购买销量必须带有数字,购买销量必须包含 boughtinpastmonth',
                        'row_list': None}
            else:
                # return "Error_asin_01:异常，购买销量必须带有数字"
                return {'error': '异常', 'error_detail': 'Error_asin_01:异常，购买销量必须带有数字',
                        'check_rules': '购买销量必须带有数字,购买销量必须包含 boughtinpastmonth', 'row_list': row}
        else:
            # return "Error_asin_02:异常，购买销量必须包含 boughtinpastmonth"
            return {'error': '异常', 'error_detail': 'Error_asin_02:异常，购买销量必须包含 boughtinpastmonth',
                    'check_rules': '购买销量必须带有数字,购买销量必须包含 boughtinpastmonth', 'row_list': row}

    def redis_db(self):
        REDIS_CONN = {
            "redis_host": "113.100.143.162",
            "redis_port": 6379,
            "redis_pwd": "yswg2023",
            "redis_db": 14

        }
        redis_113 = redis.Redis(host=REDIS_CONN['redis_host'], port=REDIS_CONN['redis_port'],
                                password=REDIS_CONN['redis_pwd'], db=REDIS_CONN['redis_db'])
        return redis_113

    def column_describe(self):
        column_chinese_describe = {
            'activity_type': '促销类型',
            'one_two_val': 'coupon额度 activity_type 1、2对应的值',
            'three_four_val': 'Prime Exclusive Discounts额度 activity_type 3 4对应的值',
            'eight_val': '9：降低幅度的值 ',
            'ac_name': 'Amazon Choice后面的词，不包含品牌名称',
            'package_quantity': '包装数量',
            'pattern_name': '模式名称',
            'buy_sales': 'asin详情 月销售量',
            'review_ai_text': '亚马逊AI 根据客户评论文本生成分析',
            'review_label_json': 'AI 根据客户评论文本生成标签。以及对应评论',
            'lob_asin_json': 'Make it a bundle商品的asin',
            'weight': '重量',
            'material': '材料或者材质',
            'launch_time': '上架时间',
            'together_asin': '一起购买的asin',
            'parentAsin': '',
            'product_description': '底部的 产品描述',
            'product_json': '产品属性。五点描述上方展示的',
            'price': '价格',
            'rating': '星级',
            'total_comments': '评论数',
            'volume': '体积',
            'brand': '品牌名称',
            'weight_str': '完整重量未转换',
            'describe': '头部 五点描述',
            'rank': '排名',
            'node_id': 'node_id',
            'product_detail_json': 'asin详情底部的产品属性',
            'img_num': '图片链接长度',
            'img_url': '图片url',
            'asin': 'asin',
            'title_len': '标题长度',
            'title': '标题',
            'page_inventory': '库存',
            'buy_box_seller_type': "配送类型",
            'img_type': '是否有A+ 视频 图片1 视频2 A+3',
            'category': '头部分类'
        }
        return column_chinese_describe

    def get_redis_data(self):
        redis14 = self.redis_db()
        list_data_list = []
        # new_date = datetime.now().strftime("%Y-%m-%d")
        new_date_list = ['2024-05-06', '2024-05-07']
        for new_date in new_date_list:
            start_index = 0  # 起始索引
            end_index = -1  # 结束索引，-1 表示获取整个列表
            list_data = redis14.lrange(new_date, start_index, end_index)
            list_data_list.extend(list_data)
        if list_data_list:
            # 使用 Counter 统计元素出现次数
            element_counts = Counter(list_data_list)
            # 输出统计结果
            count_list_data = []
            # [1:验证码。2：请求失败。3：请求成功。4：发出总请求数]
            elements_to_check = [1, 2, 3, 4]  # 需要检查的元素列表
            for element in elements_to_check:
                list_data_dict = {}
                element_str = str(element).encode()  # 将整数转为字节字符串
                count = element_counts.get(element_str, 0)  # 获取元素的计数，若不存在则默认为 0
                print(f"{element}: {count}")
                list_data_dict[element] = count
                count_list_data.append(list_data_dict)
            print('1:验证码。2：请求失败。3：请求成功。4：发出总请求数::', count_list_data)
            # new_date_hour = str(new_date) + ':0-23'
            # print(new_date_hour)
            # list_hour_data = redis14.lrange(new_date_hour, start_index, end_index)
            # # 使用 Counter 统计元素出现次数
            # element_counts_hour = Counter(list_hour_data)
            # hour_data_dict = {}
            # for element, counts in element_counts_hour.items():
            #     hour_data_dict[str(new_date) + ':' + element.decode('utf-8')] = counts
            # hour_data_json = json.dumps(hour_data_dict)
            # print('每小时抓取总数：：', hour_data_json)
            # total_count = sum(hour_data_dict.values())
            # print('当天抓取总数：', total_count)
            # # 统计字段空值次数。空值率
            # # log_time = time.strftime('%Y-%m-%d', time.localtime(time.time()))
            # log_time = '2024-05-07'
            # asin_detail_column_is_none_count = f'{log_time}_asin_detail_column_is_none_count'
            # redis_values = redis14.lrange(asin_detail_column_is_none_count, start_index, end_index)
            # # 使用 map() 函数将字节串转换为 Python 字符串
            # asin_detail_data_list = list(map(lambda x: x.decode('utf-8'), redis_values))
            # # 使用 Counter 统计元素出现次数
            # element_count = Counter(asin_detail_data_list)
            # asin_detail_column_dict = {}
            # column_chinese_describe = self.column_describe()
            # for element, count in element_count.items():
            #     asin_detail_column_dict[element] = count
            # asin_column_dict = {}
            # for k, v in asin_detail_column_dict.items():
            #     asin_column_dict[k] = v
            #     asin_column_dict[k + '_describe'] = column_chinese_describe.get(k)
            # asin_column_none_count_dict = {
            #     '月销': str(int(asin_column_dict['buy_sales'] / total_count * 100)) + '%',
            #     '排名': str(int(asin_column_dict['rank'] / total_count * 100)) + '%',
            #     '体积': str(int(asin_column_dict['volume'] / total_count * 100)) + '%',
            #     '重量': str(int(asin_column_dict['weight_str'] / total_count * 100)) + '%',
            #     '价格': str(int(asin_column_dict['price'] / total_count * 100)) + '%',
            #     '上架时间': str(int(asin_column_dict['launch_time'] / total_count * 100)) + '%',
            #     '评论数': str(int(asin_column_dict['total_comments'] / total_count * 100)) + '%',
            #     '星级': str(int(asin_column_dict['rating'] / total_count * 100)) + '%',
            #     f'{new_date}抓取总数': total_count
            # }
            # asin_column_json = json.dumps(asin_column_none_count_dict, ensure_ascii=False)
            # # print(asin_column_json)
            return count_list_data

    def send_mes(self, account, title, content):
        url = 'http://47.112.96.71:8082/selection/sendMessage'
        # 接收人 account， 标题 title， 内容 content
        data = {
            'account': account,
            'title': title,
            'content': content
        }
        requests.post(url=url, data=data, timeout=15)

    def count_columns_none(self):
        # 指定要检查的列
        columns_to_check = list(self.df.columns)

        # 计算空值数量和空值率
        null_counts = self.df[columns_to_check].isna().sum()
        null_rates = self.df[columns_to_check].isna().mean() * 100

        # 创建字典以存储结果
        self.null_info = {col: {'空值数量': null_counts[col], '空值率 (%)': null_rates[col]} for col in columns_to_check}

    def run(self):
        table_name = f"{self.site_name}_asin_detail_month_{self.date_info.replace('-', '_')}"
        self.connection = self.connect_to_database()
        if self.connection:
            try:
                # 执行查询
                query = f"SELECT * FROM {table_name} limit {int(self.limit_num)};"
                print(query)
                data = pd.read_sql_query(query, self.connection)
                self.df = data
                print("成功获取数据")
                columns_list = ['buy_sales', 'node_id', 'brand', 'img_type', 'img_num', 'launch_time', 'rank', 'weight',
                                'volume', 'category', 'page_inventory', 'buy_box_seller_type', 'total_comments',
                                'rating',
                                'price', 'title_len', 'title', 'img_url', 'asin']
                total_normal_count = 0
                total_exception_count = 0
                data_dict = {}
                # 遍历每个列名
                # 遍历每个列名
                self.asin_detail_check_error_list = []
                for column in columns_list:
                    if column in data.columns and data[column].dtype == 'int64':
                        # 将 int64 类型的列转换为 int 类型
                        data[column] = data[column].astype(int)
                    # 获取对应列的检查方法
                    check_method_name = f'check_{column}'
                    # getattr动态获取类的方法。这样可以根据传递的方法名称动态地调用对应的方法
                    check_method = getattr(self, check_method_name)
                    # 对于包含NaN值的列，使用fillna方法将NaN值替换为0
                    data[column].fillna('0', inplace=True)

                    # 调用检查方法，并接收返回值
                    result = data[column].apply(check_method)
                    # 统计异常数量
                    exception_count = result.apply(
                        lambda x: isinstance(x, dict) and x.get('error') == '异常').sum()
                    total_exception_count += exception_count

                    # 统计正常数量
                    normal_count = result.apply(
                        lambda x: isinstance(x, dict) and x.get('error') == '正常').sum()
                    total_normal_count += normal_count

                    # 统计异常详情
                    error_details = {}
                    check_rules_details = {}
                    for item in result:
                        if item['row_list']:
                            self.asin_detail_check_error_list.extend([[item['row_list'], item.get('check_rules'), column]])
                        check_rules_details['check_rules'] = item.get('check_rules')
                        if isinstance(item, dict) and item.get('error') == '异常':
                            error_detail = item.get('error_detail')
                            if error_detail in error_details:
                                error_details[error_detail] += 1
                            else:
                                error_details[error_detail] = 1

                    # 将结果存储在字典中
                    result_counts_dict = {
                        'normal_count': str(normal_count),
                        'exception_count': str(exception_count),
                        'error_details': error_details,
                        'check_rules': check_rules_details['check_rules']
                    }
                    data_dict[column] = result_counts_dict

                # 转换为 JSON 格式
                json_data = json.dumps(data_dict, ensure_ascii=False)
                self.count_columns_none()
                print(self.asin_detail_check_error_list)
                return json_data
                # self.send_mes('pengyanbing,fangxingjun', '抽取1w条进行异常值统计', json_data)
            except psycopg2.Error as e:
                print("执行查询时出错:", e)
                return None

    # 定义一个函数来处理单个子列表
    # original_list = [[['B0C448HX7V'], '没有x出现或者出现了', 'volume'],[['B0C448HX7V','vvvvvv'], '没有x出现或者出现了', 'volume']]
    def process_sublist(self, sublist):
        items = np.array(sublist[0], dtype=object)[:, None]
        other_elements = np.array(sublist[1:], dtype=object)
        result = np.hstack([items, np.tile(other_elements, (len(items), 1))])
        return result

    def count_data(self, data_list, column_data):
        asin_detail_check_list = []
        column_chinese_describe = self.column_describe()
        column_data = json.loads(column_data)
        for column in column_data:
            asin_detail_check_dict = {}
            asin_detail_check_dict['field_en_name'] = column
            asin_detail_check_dict['field_ch_name'] = column_chinese_describe.get(column)
            asin_detail_check_dict['check_rules'] = column_data[column]['check_rules']
            asin_detail_check_dict['check_total_quantity'] = self.limit_num
            asin_detail_check_dict['check_normal_quantity'] = column_data[column]['normal_count']
            asin_detail_check_dict['check_error_quantity'] = column_data[column].get('exception_count')
            asin_detail_check_dict['check_error_detail'] = json.dumps(column_data[column].get('error_details'),
                                                                      ensure_ascii=False)
            asin_detail_check_dict['check_null_quantity'] = self.null_info[column]['空值数量']
            asin_detail_check_dict['check_null_rate'] = self.null_info[column]['空值率 (%)']
            asin_detail_check_dict['requests_normal_quantity'] = data_list[2][3]
            asin_detail_check_dict['requests_all_quantity'] = data_list[3][4]
            asin_detail_check_dict['requests_normal_rate'] = str(int((data_list[2][3] / data_list[3][4]) * 100)) + '%'
            asin_detail_check_dict['date_info'] = self.date_info
            asin_detail_check_list.append(asin_detail_check_dict)
        keys = asin_detail_check_list[0].keys()
        print(keys)
        cols_list = list(keys)
        print(cols_list)
        with self.connection.begin() as conn:
            tr_sql = f"delete from {self.site_name}_asin_detail_check_2024 where date_info='{self.date_info}'"
            conn.execute(tr_sql)
            tr_sql = f"delete from {self.site_name}_asin_detail_check_error_2024"
            conn.execute(tr_sql)
        df_data = pd.DataFrame(data=asin_detail_check_list, columns=cols_list)
        df_data.to_sql(name=f'{self.site_name}_asin_detail_check_2024', con=self.connection, if_exists='append',
                       index=False)
        if self.asin_detail_check_error_list:
            # 使用 NumPy 的矢量化操作处理每个子列表
            result = np.array([self.process_sublist(sublist) for sublist in self.asin_detail_check_error_list], dtype=object)
            # 将结果转换为列表格式
            final_result = [sublist.tolist() for sublist in result]
            flattened_list = list(chain.from_iterable(final_result))

            print(flattened_list)
            df_error_data = pd.DataFrame(data=flattened_list, columns=['asin','error_type','column_name'])
            df_error_data.drop_duplicates(['asin','column_name'], inplace=True)
            df_error_data.to_sql(name=f'{self.site_name}_asin_detail_check_error_2024', con=self.connection, if_exists='append',
                           index=False)

if __name__ == '__main__':
    try:
        site_name = sys.argv[1]
        date_info = sys.argv[2]
        num = sys.argv[3]
        dataframe = MyDataFrame(site_name=site_name, date_info=date_info, num=num)
    except:
        dataframe = MyDataFrame()
    data_list = dataframe.get_redis_data()
    column_data = dataframe.run()
    dataframe.count_data(data_list, column_data)
