import pandas as pd
import sys, os
import logging
import time
from tools.seed_oa import send_mg
sys.path.append(os.path.dirname(sys.path[0]))
from db.mysql_db import get_country_engine, sql_insert, sql_insert_many, sql_connect, sql_delete

logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s',
                    level=logging.INFO)


class AsinFilter:
    """
    每日爬取
    清理_self_all_syn内非（每周和每月）asin
    数据分组 聚合 data_type字段 去重后插入到 _self_all_syn表
    """
    def __init__(self, site):
        self.site = site
        self.conn = None
        self.engine = get_country_engine(self.site)
        self.sites = {
            "us": "Amazon.com",
            "uk": "Amazon.co.uk",
            "de": "Amazon.de",
            "es": "Amazon.es",
            "it": "Amazon.it",
            "fr": "Amazon.fr",
            "mx": "Amazon.com.mx",
            "ca": "Amazon.ca",
            "nl": "Amazon.nl",
            "be": "Amazon.com.be",
            "se": "Amazon.se",
            "pl": "Amazon.pl",
            "tr": "Amazon.com.tr",
            'au': "Amazon.com.au",
        }

    def get_bsr_day_asin(self):
        # BSR榜单每日1次、新品榜单每日1次
        print(self.site)
        self.engine = get_country_engine(self.site)
        sql = f"SELECT distinct asin, date_info from bsr_day_asin WHERE TO_DAYS(created_at)>='2024-05-28' and site_name='{self.site}';"
        # 测试
        # sql = f"-- SELECT distinct asin, date_info from bsr_day_asin WHERE created_at>='2023-06-29' and site_name='{self.site}';"
        df_bsr_asin = pd.read_sql(sql, con=self.engine)
        df_bsr_asin["is_variat"] = 2
        df_bsr_asin["data_type"] = 4
        df_bsr_asin["state"] = 1
        df_bsr_asin["priority"] = 4
        # df_bsr_asin["date_info"] = None
        df_bsr_asin["site"] = self.site
        return df_bsr_asin

    def get_self_asin(self):
        # 六大站点, 内部asin
        get_asin_sql = f"SELECT DISTINCT asin, data_type, is_variat from {self.site}_self_asin;"
        df_asin = pd.read_sql(get_asin_sql, con=self.engine)
        # df_asin["data_type"] = 3
        df_asin["state"] = 1
        df_asin["priority"] = 3
        df_asin["date_info"] = None
        df_asin["site"] = self.site
        return df_asin

    def get_other_self_asin(self):
        get_asin_sql = f"SELECT DISTINCT asin, site, data_type, is_variat from other_self_asin where data_type like '%%7%%';"
        df_asin = pd.read_sql(get_asin_sql, con=self.engine)
        # df_asin["data_type"] = 3
        df_asin["state"] = 1
        df_asin["priority"] = 3
        df_asin["date_info"] = None
        return df_asin

    def get_erp_asin_syn(self):
        get_asin_sql = f"SELECT DISTINCT asin, is_variation as is_variat from {self.site}_erp_asin_syn WHERE TO_DAYS(created_at)=TO_DAYS(now());"
        df_asin = pd.read_sql(get_asin_sql, con=self.engine)
        df_asin["state"] = 1
        df_asin["priority"] = 3
        df_asin["date_info"] = None
        df_asin["site"] = self.site
        df_asin["data_type"] = '3'
        return df_asin

    def get_all_syn_st(self):
        sql = f"SELECT asin, is_variat from {self.site}_all_syn_st WHERE data_type=11 and state=1;"
        df_asin = pd.read_sql(sql, con=self.engine)
        # df_asin["data_type"] = 3
        df_asin["state"] = 1
        df_asin["priority"] = 3
        df_asin["date_info"] = None
        df_asin["site"] = self.site
        df_asin["data_type"] = '11'
        return df_asin

    def data_type(self, x):
        d = pd.DataFrame([], columns=['asin', 'data_type', 'state', 'is_variat', 'site', 'priority', 'date_info'])
        for i in set(x["site"]):
            y = x[x["site"] == i]
            if len(y) != 1:
                y["data_type"] = ",".join(list(set(y["data_type"])))
                y["priority"] = min(list(y["priority"]))
                date_info = list(set(y["date_info"]))[0] if list(set(y["date_info"])) else None
                y["date_info"] = None if date_info == "1" else date_info
                y["is_variat"] = list(set(y["is_variat"]))[0] if set(y["is_variat"]) else None
                d = d.append(y)
            else:
                d = d.append(y)
        return d

    def site_transition(self, x):
        if len(x) != 2:
            return list(self.sites.keys())[list(self.sites.values()).index(x)]
        else:
            return x

    def sum_datas(self):
        import time
        time.localtime()
        datas = pd.DataFrame([], columns=['asin', 'data_type', 'state', 'is_variat', 'priority', 'date_info', 'site'])
        for i in self.__dir__():
            if "get_" in i:
                if self.site not in ["us"] and i == "get_bsr_day_asin":
                    logging.info("非us站点 不需要调用get_bsr_asin方法")
                    continue
                if self.site != "us" and i == "get_other_self_asin":
                    logging.info("非us 不需要调用get_other_self_asin方法")
                    continue
                if i == 'get_all_syn_st' and time.localtime().tm_mday != 1:
                    logging.info("非每月1号 不需要调用get_all_syn_st方法")
                    continue
                mhd = getattr(self, i)()
                logging.info(f"{i} 获取数量为：{mhd.shape}")
                if mhd.shape:
                    datas = pd.concat([datas, mhd], ignore_index=True)
                else:
                    logging.info(f"{i} 没有数据", datas)
            else:
                continue

        datas['data_type'] = datas['data_type'].astype(str)
        # datas["site"].fillna(self.site, inplace=True)
        datas["site"] = datas.site.apply(self.site_transition)
        df_ = datas.groupby(['asin', 'site']).agg(lambda x: list(x)).reset_index()
        df_['data_type'] = df_.data_type.apply(lambda x: ",".join(list(set(x))))
        df_['priority'] = df_.priority.apply(lambda x: min(x))
        df_['state'] = df_.state.apply(lambda x: x[0])
        df_['is_variat'] = df_.is_variat.apply(lambda x: x[0])
        df_['date_info'] = df_.date_info.apply(lambda x: x[0])
        # group_asin = datas.groupby("asin").apply(self.data_type)
        return df_

    def insert_asin_all_syn(self, dates):
        logging.info(f"需要插入数据库数量为{len(dates)}----")
        # 测试
        # ['asin', 'date_info', 'is_variat', 'data_type', 'state', 'priority', 'site']
        # inset_sql = f"insert into `{self.site}_self_all_syn` (`asin`, `date_info`, `is_variation`, `data_type`, `state`, `priority`, `site`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`), `date_info` = values(`date_info`);"
        inset_sql = f"insert into `{self.site}_self_all_syn` (`asin`, `site`, `data_type`, `state`, `is_variation`, `priority`, `date_info`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`), `date_info` = values(`date_info`), `data_type` = values(`data_type`);"
        if len(dates) == 1:
            sql_insert(inset_sql, dates[0])
        else:
            sql_insert_many(inset_sql, dates)

    def insert_real_spider(self, dates):
        logging.info(f"需要插入数据库数量为{len(dates)}----")
        inset_sql = f"insert into `{self.site}_self_real_spider` (`asin`, `site`, `data_type`, `state`, `is_variation`, `priority`, `date_info`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`), `date_info` = values(`date_info`), `data_type` = values(`data_type`);"
        if len(dates) == 1:
            sql_insert(inset_sql, dates[0])
        else:
            sql_insert_many(inset_sql, dates)

    def insert_self_asin_spider(self, dates):
        logging.info(f"需要插入数据库数量为{len(dates)}----")
        # 测试
        # ['asin', 'date_info', 'is_variat', 'data_type', 'state', 'priority', 'site']
        # inset_sql = f"insert into `{self.site}_self_all_syn` (`asin`, `date_info`, `is_variation`, `data_type`, `state`, `priority`, `site`) values (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `site` = values(`site`), `state` = values(`state`), `date_info` = values(`date_info`);"
        inset_sql = f"insert into `{self.site}_self_asin_spider` (`asin`, `data_type`, `state`, `is_variat`) values (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `asin` = values(`asin`), `state` = values(`state`), `data_type` = values(`data_type`);"
        if len(dates) == 1:
            sql_insert(inset_sql, dates[0])
        else:
            sql_insert_many(inset_sql, dates)

    def delete_self_real_spider(self):
        dele_sql = f"delete from {self.site}_self_real_spider WHERE (data_type not like '%8%' or data_type not like '%9%') and data_type not in ('8', '9');"
        sql_delete(dele_sql)
        logging.info(f"清理 _self_real_spider 表,爬取完成的非周和月数据")

    def run(self):
        sql_connect(self.site)
        # 清理非收藏asin
        self.delete_self_real_spider()
        # 获取5张表内数据  进行去重处理
        datas = self.sum_datas()
        logging.info(f"处理站点字段 {datas.shape}")
        # 实时更买数据
        real_asin = datas[datas['data_type'].str.contains('7')]
        real_asin['priority'] = 2
        # 不是更买的 带类型的数据
        all_syn = datas[(~(datas['data_type'].str.contains('7')) & ~(datas['data_type'].isin(['', 'None'])))]
        all_syn['priority'] = 2
        # 6大站点 内部asin数据
        self_asin = datas[(datas['data_type'] == '') | (datas['data_type'] == 'None')]
        self_asin['priority'] = 2
        # 过滤掉非 8大站点的asin
        # new_real_asin = real_asin[real_asin['site'].isin(["mx", "ca", "us", "de", "uk", "es", "fr", "it"])]
        new_real_asin = real_asin.fillna('')

        # new_all_syn = all_syn[all_syn['site'].isin(["mx", "ca", "us", "de", "uk", "es", "fr", "it"])]
        new_all_syn = all_syn.fillna('')

        # new_self_asin = self_asin[self_asin['site'].isin(["mx", "ca", "us", "de", "uk", "es", "fr", "it"])]
        new_self_asin = self_asin.fillna('')

        del new_self_asin['site']
        del new_self_asin['priority']
        del new_self_asin['date_info']
        logging.info(f'{list(new_all_syn.keys())}')
        logging.info(f'{list(new_real_asin.keys())}')
        logging.info(f'{list(new_self_asin.keys())}')
        new_all_syn.loc[new_all_syn['is_variat'] == 0, 'is_variat'] = 2
        new_real_asin.loc[new_real_asin['is_variat'] == 0, 'is_variat'] = 2
        new_self_asin.loc[new_self_asin['is_variat'] == 0, 'is_variat'] = 2
        self.insert_asin_all_syn([list(i) for i in new_all_syn.values])
        self.insert_real_spider([list(i) for i in new_real_asin.values])
        self.insert_self_asin_spider([list(i) for i in new_self_asin.values])


if __name__ == '__main__':
    # for i in ["us"]:
    for i in ["us", "uk", "fr", "de", "it", "es"]:
        time.sleep(4)
        logging.info(f"开始运行----{i}")
        try:
            asin_filter = AsinFilter(i)
            asin_filter.run()
        except Exception as e:
            print(e)
            account = 'hezhe'
            title = f'{i} 站点 22点 每日爬取表 数据插入失败'
            content = f"{i} 站点 22点 每日爬取表 数据插入失败 时间：{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
            send_mg(account, title, content)
    # 测试insert功能  插入失败,则发送oa信息
    # asin_filter = AsinFilter("us")
    # print(asin_filter.get_product_mx_ca_publish())
