import logging
import time

import pandas as pd
import sys, os
from sqlalchemy import create_engine
sys.path.append(os.path.dirname(sys.path[0]))
from db.mysql_db import sql_connect, sql_update, sql_update_many, sql_delete, get_country_engine


logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s %(message)s',
                    level=logging.INFO)


def run(site):
    # 评论数据对比  错误时清空评论状态改为1  重新爬取
    sql_connect(site)
    engine = get_country_engine(site)

    sql1 = f"select parent_asin, content from {site}_asin_comment;"
    df_asin_comment = pd.read_sql(sql1, con=engine)

    # sql2 = f"select parent_asin from {site}_self_variat where state=3;"
    # df_self_asin = pd.read_sql(sql2, con=engine)

    sql3 = f"SELECT parent_asin, comment_num from {site}_asin_comment_num;"
    df_asin_comment_num = pd.read_sql(sql3, con=engine)

    comment_group_count = df_asin_comment.groupby("parent_asin").count()

    seeds = comment_group_count.merge(df_asin_comment_num, on='parent_asin', how='left')

    asin_a = list(seeds[(seeds["content"] != seeds["comment_num"]) & (seeds["comment_num"] - seeds["content"] >= 5) & (seeds["comment_num"] - seeds["content"] <= 40)]["parent_asin"])
    asin_b = list(seeds[(seeds["content"] != seeds["comment_num"]) & (seeds["content"] - seeds["comment_num"] >= 5) & (seeds["content"] - seeds["comment_num"] <= 40)]["parent_asin"])
    asin = []
    asin += asin_a
    asin += asin_b
    for i in asin:
        dele_sql = f"delete from {site}_asin_comment where `parent_asin`= (%s);"
        sql_delete(dele_sql, [i])
    sql_up = f"UPDATE `{site}_self_variat` set `state`=(%s)  where `parent_asin`=(%s);"
    asin = [("1", i) for i in asin]
    if len(asin) == 1:
        sql_update(sql_up, asin[0])
    else:
        sql_update_many(sql_up, asin)


if __name__ == '__main__':
    for i in ["us"]:
    # for i in ["us", "uk", "fr", "de", "it", "es"]:
        logging.info("开始运行----")
        run(i)
        time.sleep(10)


#0 */2 * * *  cd /mnt/hezhe/amazon_spider/amazon_spider/amazon_spider/applications/amazon_comment && /opt/module/anaconda3/envs/pyspark/bin/python job.py > day_comment_job.log 2>&1 &


#0 */2 * * * cd /mnt/hezhe/utils && /opt/module/anaconda3/envs/pyspark/bin/python comment_dele.py > comment_delete.log 2>&1 &