import os
import sys
import re

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from pyspark.sql import functions as F
from utils.templates import Templates
from utils.db_util import DBUtil
from utils.spark_util import SparkUtil
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pyspark.sql.types import IntegerType, StructType, StructField, StringType


class DwsUserCollectStoreAsinDetail(Templates):
    def __init__(self, site_name="us", date_type="week", date_info="2022-1", run_type='real_time',
                 seller_id_tuple=tuple()):
        super().__init__()
        self.site_name = site_name
        self.date_type = date_type
        self.date_info = date_info
        self.run_type = run_type
        self.seller_id_tuple = seller_id_tuple
        if self.run_type == 'real_time':
            self.db_save = f"user_collect_store_asin_detail_tmp"
            self.seller_id_tuple = str(self.seller_id_tuple).split(',')
        else:
            self.db_save = f"dws_user_collect_store_asin_detail"
        self.spark = self.create_spark_object(
            app_name=f"{self.db_save}: {self.site_name},{self.date_type}, {self.date_info}")
        self.partitions_by = ['site_name', 'date_type', 'date_info']
        self.reset_partitions(100)
        self.year = str(self.date_info).split("-")[0]
        self.year_week_tuple = self.get_year_week_tuple()
        self.month = self.get_month_from_date_info()
        self.previous_month = self.get_previous_month()
        self.previous_12month_tuple = self.get_previous_12month_tuple()
        self.month_week_tuple = self.get_month_week_tuple()

        # df初始化读取数据
        self.df_store_location_info = self.spark.sql("select 1+1;")
        self.df_store_relational_asin_info = self.spark.sql("select 1+1;")
        self.df_store_relational_asin_detail = self.spark.sql("select 1+1;")
        self.df_top3_asin_week_report = self.spark.sql("select 1+1;")
        self.df_top3_asin_month_report = self.spark.sql("select 1+1;")
        self.df_top3_asin_year_report = self.spark.sql("select 1+1;")
        self.df_asin_keep_date = self.spark.sql("select 1+1;")
        self.df_asin_ao = self.spark.sql("select 1+1;")
        self.df_asin_variant_radio = self.spark.sql("select 1+1;")
        self.df_asin_previous_rank = self.spark.sql("select 1+1;")
        self.df_asin_bsr_orders = self.spark.sql("select 1+1;")
        self.df_asin_previous_bsr_orders = self.spark.sql("select 1+1;")
        self.df_asin_category_name_info = self.spark.sql("select 1+1;")
        # 全局df初始化
        self.df_store_asin_detail = self.spark.sql("select 1+1;")
        # udf函数注册
        self.u_launch_time = self.spark.udf.register('u_launch_time', self.udf_launch_time, IntegerType())
        self.u_judge_is_raise_asin = self.spark.udf.register('u_judge_is_raise_asin', self.udf_judge_is_raise_asin,
                                                             IntegerType())
        self.u_judge_is_sales_surge_asin = self.spark.udf.register('u_judge_is_sales_surge_asin',
                                                                   self.udf_judge_is_sales_surge_asin, IntegerType())
        schema = StructType([
            StructField('category_first_id', StringType(), True),
            StructField('category_id', StringType(), True),
            StructField('asin_bsr_rank', IntegerType(), True)
        ])
        self.u_parse_asin_category_and_rank = self.spark.udf.register('u_parse_asin_category_and_rank',
                                                                      self.udf_parse_asin_category_and_rank, schema)

    def get_month_from_date_info(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30 ;")
        df = self.df_date.toPandas()
        df_loc = df.loc[df.date == f'{self.date_info}']
        cur_month = list(df_loc.year_month)[0]
        df_loc = df.loc[(df.year_month == f'{cur_month}') & (df.day == 1)]
        month_id = int(list(df_loc.id)[0]) - 1
        df_loc = df.loc[df.id == month_id]
        month = list(df_loc.year_month)[0]
        df_month = self.spark.sql(
            f"select * from dwd_asin_measure where site_name='{self.site_name}' and date_type = 'month' and date_info = '{month}' limit 1").cache()
        if df_month.count() == 1:
            self.month = month
        else:
            df_loc = df.loc[(df.year_month == f'{month}') & (df.day == 1)]
            current_month_id = list(df_loc.id)[0]
            previous_month_id = int(current_month_id) - 1
            df_loc = df.loc[df.id == previous_month_id]
            last_month = list(df_loc.year_month)[0]
            self.month = last_month
        print("month:", str(self.month))
        return self.month

    def get_previous_month(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30 ;")
        df = self.df_date.toPandas()
        df_loc = df.loc[(df.year_month == f'{self.month}') & (df.day == 1)]
        current_month_id = list(df_loc.id)[0]
        previous_month_id = int(current_month_id) - 1
        df_loc = df.loc[df.id == previous_month_id]
        previous_month = list(df_loc.year_month)[0]
        return previous_month

    def get_previous_12month_tuple(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30 ;")
        df = self.df_date.toPandas()
        df_loc = df.loc[(df.year_month == f'{self.month}') & (df.day == 1)]
        last_date = list(df_loc.date)[0]
        end_date = datetime.strptime(last_date, '%Y-%m-%d')
        previous_12month_tuple = tuple(
            (end_date - relativedelta(months=i)).strftime('%Y-%m') for i in range(11, -1, -1))
        return previous_12month_tuple

    def get_month_week_tuple(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30 where week_day=1 ;")
        df = self.df_date.toPandas()
        df_loc = df.loc[(df.year_month == f'{self.month}')]
        month_week_tuple = tuple(df_loc.year_week) if tuple(df_loc.year_week) else ()
        return month_week_tuple

    @staticmethod
    def udf_launch_time(launch_time, date_info):
        if launch_time is None:
            return 0
        date_format = "%Y-%m-%d"
        launch_date_format = datetime.strptime(str(launch_time), date_format)
        cur_date_format = datetime.strptime(str(date_info), date_format)
        print(cur_date_format)
        offset = abs((cur_date_format - launch_date_format).days)
        if offset <= 180:
            return 1
        else:
            return 2

    @staticmethod
    def udf_judge_is_raise_asin(is_appear_asin, asin_bsr_rank, previous_asin_bsr_rank):
        if is_appear_asin == 1:
            if previous_asin_bsr_rank is None and asin_bsr_rank is not None:
                return 1
            elif previous_asin_bsr_rank is not None and previous_asin_bsr_rank != 0 and asin_bsr_rank is not None:
                if (asin_bsr_rank - previous_asin_bsr_rank) / previous_asin_bsr_rank <= -0.5:
                    return 1
                else:
                    return 0
            else:
                return 0
        else:
            return 0

    @staticmethod
    def udf_judge_is_sales_surge_asin(asin_bsr_orders, previous_asin_bsr_orders):
        if asin_bsr_orders is not None and previous_asin_bsr_orders is None:
            return 1
        elif asin_bsr_orders is not None and previous_asin_bsr_orders is not None and previous_asin_bsr_orders != 0:
            if (asin_bsr_orders - previous_asin_bsr_orders) / previous_asin_bsr_orders >= 0.5:
                return 1
            else:
                return 0
        else:
            return 0

    @staticmethod
    def udf_parse_asin_category_and_rank(best_sellers_rank, all_best_sellers_href):
        if best_sellers_rank is not None and all_best_sellers_href is not None:
            bsr_rank_info_list = str(best_sellers_rank).split("&&&&")
            bsr_href_info_list = str(all_best_sellers_href).split("&&&&")
            asin_bsr_rank_pattern = r'((?:\d{1,3})(?:,\d{3})*)(?: +)in'
            asin_bsr_category_pattern = r'bestsellers/(.*)/ref'
            # 解析分类id
            if len(bsr_href_info_list) == 1:
                result = re.findall(asin_bsr_category_pattern, bsr_href_info_list[0])
                if result:
                    if '/' in result[0]:
                        category_first_id, category_id = str(result[0]).split("/")[0], str(result[0]).split("/")[1]
                    else:
                        category_first_id, category_id = str(result[0]).split("/")[0], None
                else:
                    category_first_id, category_id = None, None
            elif len(bsr_href_info_list) > 1:
                category_first_id = re.findall(asin_bsr_category_pattern, bsr_href_info_list[0])[0] if re.findall(
                    asin_bsr_category_pattern, bsr_href_info_list[0]) else None
                category_id = re.findall(asin_bsr_category_pattern, bsr_href_info_list[-1])[0] if re.findall(
                    asin_bsr_category_pattern, bsr_href_info_list[-1]) else None
                if '/' in category_first_id:
                    category_first_id = category_first_id.split("/")[0]
                if '/' in category_id:
                    category_id = category_id.split("/")[-1]
            else:
                category_first_id, category_id = None, None
            # 解析排名
            if len(bsr_rank_info_list) == 1:
                if category_first_id:
                    asin_bsr_rank = re.findall(asin_bsr_rank_pattern, bsr_rank_info_list[0])[0] if re.findall(
                        asin_bsr_rank_pattern, bsr_rank_info_list[0]) else None
                    asin_bsr_rank = asin_bsr_rank.replace(",", "")
                else:
                    asin_bsr_rank = None
            elif len(bsr_rank_info_list) > 1:
                if category_first_id:
                    asin_bsr_rank = re.findall(asin_bsr_rank_pattern, bsr_rank_info_list[0])[0] if re.findall(
                        asin_bsr_rank_pattern, bsr_rank_info_list[0]) else None
                    asin_bsr_rank = int(asin_bsr_rank.replace(",", ""))
                else:
                    asin_bsr_rank = None
            else:
                asin_bsr_rank = None
            return category_first_id, category_id, asin_bsr_rank

    def read_data(self):
        print("1.读取店铺所在地信息表")
        if self.run_type == 'real_time':
            sql1 = f"""
                select seller_id, country_name as store_location, date_info from ods_seller_account_feedback 
                   where site_name='{self.site_name}' and date_type='month' and seller_id
            """
            query_store = ', '.join([f"'{value}'" for value in self.seller_id_tuple])
            sql2 = f" in ({query_store})"
            sql = sql1 + sql2
        else:
            sql = f"""
                       select seller_id, country_name as store_location, date_info from ods_seller_account_feedback 
                       where site_name='{self.site_name}' and date_type='month' and length(seller_id) > 2
                   """
        print("sql=", sql)
        self.df_store_location_info = self.spark.sql(sqlQuery=sql)
        self.df_store_location_info = self.df_store_location_info.orderBy(
            self.df_store_location_info.date_info.desc_nulls_last())
        self.df_store_location_info = self.df_store_location_info.drop_duplicates(['seller_id']).cache()
        self.df_store_location_info = self.df_store_location_info.drop("date_info")

        print("2.读取店铺下前20页asin信息表")
        con_info = DBUtil.get_connection_info('postgresql', 'us')
        if self.run_type == 'real_time':
            sql1 = f"""
                       select a.account_id as seller_id, a.asin as asin, a.created_at as store_crawl_time,
                       a.account_name as store_name, b.results_of_num as store_asin_total_num 
                       from us_user_seller_collections  a 
                       left join us_user_seller_collections b
                       on a.account_id = b.account_id
                       and b.page=1 and b.page_rank=1
                       and b.account_id is not null 
                       where a.page <=20 and a.account_id 
                   """
            query_store = ', '.join([f"'{value}'" for value in self.seller_id_tuple])
            sql2 = f" in ({query_store})"
            sql = sql1 + sql2
        else:
            sql = f"""
                       select a.account_id as seller_id, a.asin as asin, a.created_at as store_crawl_time,
                       a.account_name as store_name, b.results_of_num as store_asin_total_num 
                       from us_user_seller_collections  a 
                       left join us_user_seller_collections b
                       on a.account_id = b.account_id
                       and b.page=1 and b.page_rank=1
                       and b.account_id is not null 
                       where a.page <=20 and a.account_id  is not null
                    """
        print("sql=", sql)
        self.df_store_relational_asin_info = SparkUtil.read_jdbc_query(
            session=self.spark,
            url=con_info['url'],
            pwd=con_info['pwd'],
            username=con_info['username'],
            query=sql
        ).cache()
        self.df_store_relational_asin_info = self.df_store_relational_asin_info.drop_duplicates(['seller_id', 'asin'])

        print("3.读取相关asin详情信息表")
        con_info = DBUtil.get_connection_info('postgresql', 'us')
        sql = f"""
                 select asin, price as asin_price, rating as asin_rating, total_comments as asin_total_comments, 
                 launch_time, parent_asin, best_sellers_rank, all_best_sellers_href 
                 from us_self_asin_detail_base where account_id is not null
             """
        print("sql=", sql)
        self.df_store_relational_asin_detail = SparkUtil.read_jdbc_query(
            session=self.spark,
            url=con_info['url'],
            pwd=con_info['pwd'],
            username=con_info['username'],
            query=sql
        ).cache()

        print("4.读取keep_date获取上架时间")
        sql = f"""
                   select asin, launch_time as new_launch_time, updated_at from ods_asin_keep_date where state=3 and site_name='{self.site_name}'
               """
        print("sql=", sql)
        self.df_asin_keep_date = self.spark.sql(sqlQuery=sql)
        self.df_asin_keep_date = self.df_asin_keep_date.orderBy(self.df_asin_keep_date.updated_at.desc_nulls_last())
        self.df_asin_keep_date = self.df_asin_keep_date.drop_duplicates(['asin']).cache()
        self.df_asin_keep_date = self.df_asin_keep_date.drop("updated_at")

        print("5.读取月报告+对应周报告+年报告数据")
        sql = f"""
                   select asin1, asin2, asin3 from ods_brand_analytics where site_name='{self.site_name}' and date_type='month' and date_info='{self.month}'
               """
        print("month_sql", sql)
        self.df_top3_asin_month_report = self.spark.sql(sqlQuery=sql)
        sql = f"""
                   select asin1, asin2, asin3 from ods_brand_analytics where site_name='{self.site_name}' and date_type='week' and date_info in {self.month_week_tuple}
               """
        print("week_sql", sql)
        self.df_top3_asin_week_report = self.spark.sql(sqlQuery=sql)
        sql = f"""
                   select asin1, asin2, asin3, date_info from ods_brand_analytics where site_name='{self.site_name}' and date_type='month' and date_info in {self.previous_12month_tuple}
               """
        print("12month_sql", sql)
        self.df_top3_asin_year_report = self.spark.sql(sqlQuery=sql)

        print("6.读取asin的ao值信息")
        sql = f"""
                   select asin, asin_ao_val, date_info from dwd_asin_measure where site_name='{self.site_name}' and date_type='month'
               """
        print("sql=", sql)
        self.df_asin_ao = self.spark.sql(sqlQuery=sql)
        self.df_asin_ao = self.df_asin_ao.orderBy(self.df_asin_ao.date_info.desc_nulls_last())
        self.df_asin_ao = self.df_asin_ao.drop_duplicates(['asin']).cache()
        self.df_asin_ao = self.df_asin_ao.drop("date_info")

        print("7.获取asin上个月的bsr排名信息")
        sql = f"""
                    select asin, asin_bs_cate_1_rank as previous_asin_bsr_rank, asin_bs_cate_1_id as category_first_id
                   from dim_asin_bs_info where site_name='{self.site_name}' and date_type='month' and date_info='{self.previous_month}'
               """
        print("sql=", sql)
        self.df_asin_previous_rank = self.spark.sql(sqlQuery=sql).cache()

        print("8.获取asin的bsr销量信息")
        sql = f"""
                   select asin, asin_bsr_orders from dwd_asin_measure where site_name='{self.site_name}' and date_type='month' and date_info='{self.month}'
               """
        print("sql=", sql)
        self.df_asin_bsr_orders = self.spark.sql(sqlQuery=sql).cache()

        print("9.获取asin上个月的bsr销量信息")
        sql = f"""
                   select asin, asin_bsr_orders as previous_asin_bsr_orders from dwd_asin_measure where site_name='{self.site_name}' and date_type='month' and date_info='{self.previous_month}'
               """
        print("sql=", sql)
        self.df_asin_previous_bsr_orders = self.spark.sql(sqlQuery=sql).cache()

        print("10.获取分类名称")
        sql = f"""
                  select category_id, en_name from dim_bsr_category_tree where site_name='{self.site_name}' and category_id is not null group by category_id, en_name
              """
        print('sql=', sql)
        self.df_asin_category_name_info = self.spark.sql(sqlQuery=sql).cache()

    def handle_store_asin_category(self):
        self.df_store_relational_asin_detail = self.df_store_relational_asin_detail.withColumn(
            "category_info", self.u_parse_asin_category_and_rank(self.df_store_relational_asin_detail.best_sellers_rank,
                                                                 self.df_store_relational_asin_detail.all_best_sellers_href)
        )
        self.df_store_relational_asin_detail = self.df_store_relational_asin_detail.withColumn("category_first_id",
                                                                                               self.df_store_relational_asin_detail.category_info.getField(
                                                                                                   "category_first_id"))
        self.df_store_relational_asin_detail = self.df_store_relational_asin_detail.withColumn("category_id",
                                                                                               self.df_store_relational_asin_detail.category_info.getField(
                                                                                                   "category_id"))
        self.df_store_relational_asin_detail = self.df_store_relational_asin_detail.withColumn("asin_bsr_rank",
                                                                                               self.df_store_relational_asin_detail.category_info.getField(
                                                                                                   "asin_bsr_rank"))
        self.df_store_relational_asin_detail = self.df_store_relational_asin_detail.drop("category_info", "all_best_sellers_href", "best_sellers_rank")

    def handle_store_asin_detail(self):
        self.df_store_asin_detail = self.df_store_relational_asin_info.join(
            self.df_store_relational_asin_detail, on=['asin'], how='left'
        ).join(
            self.df_asin_bsr_orders, on=['asin'], how='left'
        ).join(
            self.df_asin_keep_date, on=['asin'], how='left'
        ).join(
            self.df_asin_ao, on=['asin'], how='left'
        ).join(
            self.df_asin_previous_bsr_orders, on=['asin'], how='left'
        ).join(
            self.df_asin_previous_rank, on=['asin', 'category_first_id'], how='left'
        ).join(
            self.df_store_location_info, on=['seller_id'], how='left'
        )

    def handle_store_asin_category_name(self):
        df_asin_category_first_name = self.df_asin_category_name_info.withColumnRenamed('category_id',
                                                                                        'category_first_id')
        self.df_store_asin_detail = self.df_store_asin_detail.join(
            df_asin_category_first_name, on=['category_first_id'], how='left'
        )
        self.df_store_asin_detail = self.df_store_asin_detail.withColumnRenamed('en_name', 'category_first_name')
        self.df_store_asin_detail = self.df_store_asin_detail.join(
            self.df_asin_category_name_info, on=['category_id'], how='left'
        )
        self.df_store_asin_detail = self.df_store_asin_detail.withColumnRenamed('en_name', 'category_name')

    def handle_store_asin_launch_time(self):
        # keepa信息补充asin上架时间
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "asin_launch_time", F.when((F.col("launch_time").isNull()) | (F.col("launch_time") == 'null'),
                                       F.col("new_launch_time")).otherwise(F.col("launch_time")))
        self.df_store_asin_detail = self.df_store_asin_detail.drop("launch_time", "new_launch_time")
        # 通过上架时间判断asin类型（新品，老品，未知）
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "asin_type", self.u_launch_time(F.col("asin_launch_time"), F.lit(self.date_info)))

    def handle_asin_ao(self):
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "is_standard_ao", F.when(F.col("asin_ao_val") <= 0.5, F.lit(1)).otherwise(F.lit(0))
        )

    def handle_asin_type(self):
        # 周报告+月报告 = 完整月报告
        df_week_top1_asin = self.df_top3_asin_week_report.selectExpr("asin1 as asin").drop_duplicates(['asin'])
        df_week_top2_asin = self.df_top3_asin_week_report.selectExpr("asin2 as asin").drop_duplicates(['asin'])
        df_week_top3_asin = self.df_top3_asin_week_report.selectExpr("asin3 as asin").drop_duplicates(['asin'])
        df_week_complete_top_asin = df_week_top1_asin.union(df_week_top2_asin).union(df_week_top3_asin).drop_duplicates(
            ['asin'])
        df_month_top1_asin = self.df_top3_asin_month_report.selectExpr("asin1 as asin").drop_duplicates(['asin'])
        df_month_top2_asin = self.df_top3_asin_month_report.selectExpr("asin2 as asin").drop_duplicates(['asin'])
        df_month_top3_asin = self.df_top3_asin_month_report.selectExpr("asin3 as asin").drop_duplicates(['asin'])
        df_month_complete_top_asin = df_month_top1_asin.union(df_month_top2_asin).union(
            df_month_top3_asin).drop_duplicates(['asin'])
        df_month_complete_top_asin_report = df_month_complete_top_asin.union(df_week_complete_top_asin).drop_duplicates(
            ['asin'])
        df_month_complete_top_asin_report = df_month_complete_top_asin_report.withColumn("is_appear_asin", F.lit(1))
        # 通过完整月报告判断是否上升产品
        self.df_store_asin_detail = self.df_store_asin_detail.join(
            df_month_complete_top_asin_report, on=['asin'], how='left'
        )
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn('is_raise_asin', self.u_judge_is_raise_asin(
            F.col("is_appear_asin"), F.col("asin_bsr_rank"), F.col("previous_asin_bsr_rank")
        ))
        self.df_store_asin_detail = self.df_store_asin_detail.drop("is_appear_asin", "previous_asin_bsr_rank")
        # 近12个月的月报告 = 完整年报告
        df_year_top1_asin = self.df_top3_asin_year_report.select("asin1", "date_info").drop_duplicates(
            ['asin1', 'date_info']).withColumnRenamed("asin1", "asin")
        df_year_top2_asin = self.df_top3_asin_year_report.selectExpr("asin2", "date_info").drop_duplicates(
            ['asin2', 'date_info']).withColumnRenamed("asin2", "asin")
        df_year_top3_asin = self.df_top3_asin_year_report.selectExpr("asin3", "date_info").drop_duplicates(
            ['asin3', 'date_info']).withColumnRenamed("asin3", "asin")
        df_year_complete_top_asin = df_year_top1_asin.union(df_year_top2_asin).union(df_year_top3_asin).drop_duplicates(
            ['asin', 'date_info'])
        df_year_complete_top_asin_report = df_year_complete_top_asin.groupby(['asin']).agg(
            F.count('date_info').alias('asin_appear_num'))
        # 通过完整年报告判断是否爆款产品
        self.df_store_asin_detail = self.df_store_asin_detail.join(
            df_year_complete_top_asin_report, on=['asin'], how='left'
        )
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "is_popular_asin", F.when(F.col("asin_appear_num") >= 8, F.lit(1)).otherwise(F.lit(0)))
        self.df_store_asin_detail = self.df_store_asin_detail.drop("asin_appear_num")
        # 判断是否优质产品
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "is_high_quantity_asin",
            F.when((F.col("is_raise_asin") == 1) & (F.col("is_popular_asin") == 1), F.lit(1)).otherwise(F.lit(0)))
        # 判断是否销量激增产品
        self.df_store_asin_detail = self.df_store_asin_detail.withColumn(
            "is_sales_surge_asin",
            self.u_judge_is_sales_surge_asin(F.col("asin_bsr_orders"), F.col("previous_asin_bsr_orders"))
        )
        self.df_store_asin_detail = self.df_store_asin_detail.drop("previous_asin_bsr_orders")

    def handle_data_group(self):
        self.df_save = self.df_store_asin_detail
        self.df_save = self.df_save.withColumn("created_time",
                                               F.date_format(F.current_timestamp(), 'yyyy-MM-dd HH:mm:SS')). \
            withColumn("updated_time", F.date_format(F.current_timestamp(), 'yyyy-MM-dd HH:mm:SS'))
        self.df_save = self.df_save.withColumn("site_name", F.lit(self.site_name))
        self.df_save = self.df_save.withColumn("date_type", F.lit(self.date_type))
        self.df_save = self.df_save.withColumn("date_info", F.lit(self.date_info))

    def handle_data(self):
        self.handle_store_asin_category()
        self.handle_store_asin_detail()
        self.handle_store_asin_category_name()
        self.handle_store_asin_launch_time()
        self.handle_asin_ao()
        self.handle_asin_type()
        self.handle_data_group()

    def save_data(self):
        if self.run_type == 'real_time':
            output_path = f"/home/big_data_selection/tmp/{self.db_save}"
            print("当前存储的路径为：", output_path)
            df_save = self.df_save.repartition(self.partitions_num)
            df_dws_result = df_save
            df_dws_result.persist()
            df_save.write.mode("overwrite").parquet(output_path)
        else:
            Templates.save_data(self)

    def run(self):
        self.read_data()
        self.handle_data()
        self.save_data()


if __name__ == '__main__':
    site_name = sys.argv[1]  # 参数1：站点
    date_type = sys.argv[2]  # 参数2：类型：week/4_week/month/quarter
    date_info = sys.argv[3]  # 参数3：年-周/年-月/年-季, 比如: 2022-1
    run_type = sys.argv[4]
    seller_id_tuple = sys.argv[5]
    handle_obj = DwsUserCollectStoreAsinDetail(site_name=site_name, date_type=date_type, date_info=date_info,
                                               run_type=run_type, seller_id_tuple=seller_id_tuple)
    handle_obj.run()
