import time
import json
import logging
import platform
import pandas as pd
from tools.seed_oa import send_mg
from scrapy.http.headers import Headers
from db.mysql_db import get_country_engine
from db.pg_db import get_pg_country_engine

from db.redis_db import hgetall, expire, hget, hset, hdel, delete

if "Windows" == platform.system():
    print("windows")
else:
    time.tzset()


logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s',
                    level=logging.INFO)

col = [
    'asin', 'img_url', 'title', 'title_len', 'price', 'rating', 'total_comments', 'buy_box_seller_type',
    'page_inventory', 'category', 'volume', 'weight', 'rank', 'launch_time', 'video_url', 'add_url',
    'material', 'img_num', 'img_type', 'qa_num', 'brand', 'ac_name', 'node_id', 'sp_num', 'mpn',
    'online_time', 'describe', 'one_star', 'two_star', 'three_star', 'four_star', 'five_star',
    'low_star', 'asin_type', 'is_coupon', 'search_category', 'weight_str', 'date_info', 'site',
    'account_name', 'other_seller_name', 'bsr_date_info', 'account_id'
]



del_keys = ['items_count', 'asin', 'img_url', 'title', 'title_len', 'price', 'rating', 'total_comments', 'buy_box_seller_type', 'page_inventory', 'category', 'volume', 'weight', 'rank', 'launch_time', 'material', 'img_num', 'img_type', 'brand', 'node_id', 'sp_num', 'describe', 'one_star', 'two_star', 'three_star', 'four_star', 'five_star', 'low_star', 'asin_type', 'is_coupon', 'search_category', 'weight_str', 'date_info', 'site', 'account_name', 'add_url', 'video_url', 'mpn', 'qa_num', 'online_time', 'other_seller_name', 'ac_name', 'account_id', 'bsr_date_info']

filter_field = ['id', 'asin', "items_count", 'bsr_date_info', 'date_info', 'is_coupon', 'asin_type', 'img_type', 'page_inventory', 'buy_box_seller_type', 'site', 'title_len', 'img_num', 'created_at', 'updated_at']


statr_time_h = time.strftime("%Y-%m-%d %H", time.localtime(time.time() - 1 * 60 * 60)) + ":00:00"

end_time_h = time.strftime("%Y-%m-%d %H", time.localtime()) + ":00:00"

statr_time_d = time.strftime("%Y-%m-%d", time.localtime(time.time() - 24 * 60 * 60))

end_time_d = time.strftime("%Y-%m-%d", time.localtime())

t = time.strftime("%Y-%m-%d-%H", time.localtime())

Y = end_time_d.split("-")[0]

def get_asin_detail(site):
    sql1 = f"SELECT * FROM {site}_self_asin_detail WHERE updated_at >='{statr_time_h}' and updated_at <='{end_time_h}';"
    df_truncate = pd.read_sql(sql1, con=mysql_engine)
    logging.info(f"sql：{sql1}")
    return df_truncate


def get_asin_detail_2023(site):
    sql1 = f"SELECT * FROM {site}_self_asin_detail_{Y} WHERE updated_at >='{statr_time_h}' and updated_at <='{end_time_h}';"
    df_truncate = pd.read_sql(sql1, con=pg_engine)
    logging.info(f"sql：{sql1}")
    return df_truncate


def day_real_sum(site):
    asin_detail = get_asin_detail(site)
    if site == "us":
        asin_detail_2023 = get_asin_detail_2023(site)

        details = asin_detail.append(asin_detail_2023)
        return details
    else:
        return asin_detail


# def week_detail(site):
#     sql1 = f"SELECT * FROM {site}_asin_detail_{Y}_31 WHERE created_at >='{statr_time_d}' and created_at <='{end_time_d}';"
#     df_truncate = pd.read_sql(sql1, con=mysql_engine)
#     logging.info(f"sql：{sql1}")
#     return df_truncate
#
#
# def seller_account_feedback_2023(site):
#     sql1 = f"SELECT * FROM {site}_seller_account_feedback_{Y} WHERE created_at >='{statr_time_d}' and created_at <='{end_time_d}';"
#     df_truncate = pd.read_sql(sql1, con=pg_engine)
#     logging.info(f"sql：{sql1}")
#     return df_truncate
#
#
# def seller_asin_product_2023(site):
#     sql1 = f"SELECT * FROM {site}_seller_asin_product_2023 WHERE created_at >='{statr_time_d}' and created_at <='{end_time_d}';"
#     df_truncate = pd.read_sql(sql1, con=pg_engine)
#     logging.info(f"sql：{sql1}")
#     return df_truncate


def get_asin_seller(site):
    sql1 = f"SELECT *  from self_asin_seller where site='{site}';"
    df_truncate = pd.read_sql(sql1, con=get_country_engine("us"))
    logging.info(f"sql：{sql1}")
    return df_truncate


def sum_details(key, datas):
    drop_list = []
    if key in ["asin_detail:acquisition_rate:hour", "asin_detail:seller:day"]:
        drop_list = ["asin", "site"]
    elif key == "asin_detail:acquisition_rate:week":
        drop_list = ["asin"]
    elif key == "feedback:acquisition_rate:month":
        drop_list = ["seller_id"]
    elif key == "product:acquisition_rate:month":
        drop_list = ["asin", "seller_id"]

    datas.drop_duplicates(drop_list, inplace=True)
    details_count = datas.shape[0]
    fields_details_count = dict(datas.isna().sum())
    site_success_rate = [{k: round((details_count - v) / details_count, 2)} for k, v in fields_details_count.items() if
                         k not in filter_field]
    site_success_rate.append({"items_count": details_count})
    datas = []
    for i in site_success_rate:
        pd.isna(i.values())
        if pd.isna(list(i.values())[0]):
            datas.append({list(i.keys())[0]: 0})
        else:
            datas.append({list(i.keys())[0]: list(i.values())[0]})
    return datas, details_count


is_err = ['img_url', 'title', 'price', 'rating', 'total_comments', 'category', 'volume', 'weight', 'rank', 'launch_time', 'video_url', 'add_url', 'material', 'created_at', 'updated_at', 'qa_num', 'brand', 'ac_name', 'node_id', 'sp_num', 'mpn', 'online_time', 'describe', 'one_star', 'two_star', 'three_star', 'four_star', 'five_star', 'low_star', 'search_category', 'weight_str', 'account_name', 'other_seller_name', 'items_count']

# for i in ["uk"]:
for i in ["us", "uk", "fr", "de", "it", "es"]:
    logging.info(f"站点：{i}")
    # 获取站点链接
    pg_engine = get_pg_country_engine(i)

    mysql_engine = get_country_engine(i)
    # 获取详情数据
    day_real = day_real_sum(i)


    rate_dict = {
        "asin_detail:acquisition_rate:hour": sum_details("asin_detail:acquisition_rate:hour", day_real),
        "asin_detail:seller:day": sum_details("asin_detail:seller:day", get_asin_seller(i)),
        # "feedback:acquisition_rate:month": sum_details("feedback:acquisition_rate:month", seller_account_feedback_2023(i)),
        # "product:acquisition_rate:month": sum_details("product:acquisition_rate:month", seller_asin_product_2023(i)),

    }
    for k, v in rate_dict.items():
        del_num = 10
        if k == "asin_detail:acquisition_rate:hour":
            hset(f'{i}:{k}', t, json.dumps(v[0]))
            del_num = 72
            logging.info(f"1小时内{k}所有数据字段获取率{v}")
            if v[1]:
                fiele_error = []
                for d in v[0]:
                    if (list(d.values())[0] == 0) or pd.isna(list(d.values())[0]):
                        fiele_error.append(list(d.keys())[0])
                logging.info(f"未获取到数据的字段 为{fiele_error}")
                if fiele_error:
                    send_mg("hezhe", f"【{i}站点{t}字段获取率为0字段】", f"字段{json.dumps(fiele_error)}")
        elif k == "asin_detail:seller:day":
            if v[1] and t[-2::] == "00":
                if v[1]:
                    hset(f'{i}:{k}', t, json.dumps(v[0]))
                    del_num = 10
                    logging.info(f"1天内{k}所有数据字段获取率{v}")
                    fiele_error = []
                    for d in v[0]:
                        if (list(d.values())[0] == 0) or pd.isna(list(d.values())[0]):
                            fiele_error.append(list(d.keys())[0])
                    logging.info(f"未获取到数据的字段 为{fiele_error}")
                    if fiele_error:
                        send_mg("hezhe", f"【{i}站点{t}字段获取率为0字段】", f"字段{json.dumps(fiele_error)}")
                    data_day = hgetall(f'{i}:{k}')
                    datas_day = Headers(data_day, encoding='utf-8').to_unicode_dict()
                    item = []
                    for k, v in datas_day.items():
                        v = json.loads(v)
                        svg_dict = {}
                        for data_d in v:
                            svg_dict[list(data_d.keys())[0]] = list(data_d.values())[0]
                        svg_dict['data_info'] = k
                        item.append(svg_dict)
                    df = pd.DataFrame(item)
                    error_dict_d = {k: v for k, v in dict(df[(df.mean() - df) >= 0.5].count()).items() if v > 0}
                    if error_dict_d:
                        send_mg("hezhe", f"【{i}站点{t}字段获取率低于平均值0.5异常字段】", f"字段{json.dumps(error_dict_d)}")
        else:
            print(v[1])
            # if v[1] and end_time_h[-2::] == "00":
            if v[1]:
                fiele_error = []
                for d in v[0]:
                    if (list(d.values())[0]) or pd.isna(list(d.values())[0]):
                        fiele_error.append(list(d.keys())[0])
                logging.info(f"未获取到数据的字段 为{fiele_error}")
                hset(f'{i}:{k}', end_time_d, json.dumps(v[0]))
                logging.info(f"1天内{k}所有数据字段获取率{v}")

        y_m_d_h_datas = hgetall(f'{i}:{k}')
        datas = Headers(y_m_d_h_datas, encoding='utf-8').to_unicode_dict()
        k_time = list(datas.keys())
        k_time.sort()
        if len(k_time) > del_num:
            d_time_k = k_time[0:len(k_time) - del_num]
        else:
            d_time_k = []
        print("需要删除的key", d_time_k)
        for u in d_time_k:
            hdel(f'{i}:{k}', u)
        logging.info(f"删除超过{del_num}天的 {i}:{k} key")





