"""
ABA搜索词统计报表
"""
"""
author: 汪瑞
description: 基于dwd层等表，计算出search_term维度的报表信息
table_read_name: dim_cal_asin_history_detail系列表,dwd_st_measure系列表,dws_top100_asin_info系列表,ods_st_key系列表, dwd_asin_measure系列表, dwd_st_asin_measure系列表
table_save_name: dwt_aba_st_analytics_report
table_save_level: dwt
version: 1.0
created_date: 2022-11-17
updated_date: 2022-11-17
"""

import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from utils.templates import Templates
# from AmazonSpider.pyspark_job.utils.templates import Templates
# 分组排序的udf窗口函数
from pyspark.sql.window import Window
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, IntegerType
from sqlalchemy import create_engine
import pandas as pd
import datetime

class DwtAbaStAnalyticsReport(Templates):

    def __init__(self, site_name="us", date_type="day", date_info="2022-11-02"):
        super().__init__()
        self.site_name = site_name
        self.date_type = date_type
        self.date_info = date_info
        self.db_save = f"dwt_aba_st_analytics_report"


        self.spark = self.create_spark_object(app_name=f"{self.db_save} {self.site_name}, {self.date_info}")
        self.year_month = self.udf_year_month(self)

        self.year_day = self.udf_year_day(self)

        self.df_save = self.spark.sql(f"select 1+1;")

        self.df_asin_history_detail = self.spark.sql(f"select 1+1;")
        self.df_st_asin_measure = self.spark.sql(f"select 1+1;")
        self.df_st_asin_detail = self.spark.sql(f"select 1+1;")
        self.df_st_key = self.spark.sql(f"select 1+1;")
        self.df_top100_asin = self.spark.sql(f"select 1+1;")
        self.df_st_measeure = self.spark.sql(f"select 1+1;")
        self.df_asin_measure = self.spark.sql(f"select 1+1;")
        self.df_asin_count = self.spark.sql(f"select 1+1;")
        self.df_save = self.spark.sql(f"select 1+1;")
        self.df_st_buy_box = self.spark.sql(f"select 1+1;")
        self.df_st_color = self.spark.sql(f"select 1+1;")
        self.df_st_seller = self.spark.sql(f"select 1+1;")
        self.df_st_top20 = self.spark.sql(f"select 1+1;")
        self.df_st_new_asin_group = self.spark.sql(f"select 1+1;")
        self.df_st_bsr_orders_group = self.spark.sql(f"select 1+1;")
        self.df_st_aadd_bsr_orders_group = self.spark.sql(f"select 1+1;")
        self.df_st_img_type_group = self.spark.sql(f"select 1+1;")
        self.df_st_launch_time_group = self.spark.sql(f"select 1+1;")
        self.df_st_price_group = self.spark.sql(f"select 1+1;")
        self.df_st_ao_group = self.spark.sql(f"select 1+1;")
        self.df_st_comments_group = self.spark.sql("select 1+1;")

        self.df_save = self.spark.sql(f"select * from dwt_aba_st_analytics_report limit 0;")


        self.partitions_by = ['site_name', 'date_type', 'date_info']
        self.reset_partitions(15)
        if self.date_type in ["week", "4_week"]:
            self.partitions_type = "dt"
        elif self.date_type in ["month"]:
            self.partitions_type = "dm"
        elif self.date_type in ["quarter"]:
            self.partitions_type = "dq"


        self.u_get_color_num = self.spark.udf.register("u_get_color_num", self.udf_get_color_num, StringType())
        self.u_get_buy_box_num = self.spark.udf.register("u_get_buy_box_num", self.udf_get_buy_box_num, StringType())
        self.u_get_buy_box = self.spark.udf.register("u_get_buy_box", self.udf_get_buy_box_type, StringType())
        self.u_get_img_type = self.spark.udf.register("u_get_img_type", self.udf_get_img_type, StringType())
        self.u_year_week = self.spark.udf.register('u_year_week', self.udf_year_week, StringType())
        self.u_get_seller_num = self.spark.udf.register('u_get_seller_num', self.udf_get_seller_num, StringType())
        self.u_get_seller_bsr_orders = self.spark.udf.register('u_get_seller_bsr_orders', self.udf_get_seller_bsr_orders, StringType())


    @staticmethod
    def udf_year_month(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30;")
        df = self.df_date.toPandas()
        if self.date_type == 'day':
            df_today = df.loc[df.date == f'{self.date_info}']
            year_month = list(df_today.year_month)[0]
            print(year_month)
            return year_month

    @staticmethod
    def udf_year_day(self):
        self.df_date = self.spark.sql(f"select * from dim_date_20_to_30;")
        df = self.df_date.toPandas()
        if self.date_type == 'day':
            year_day = self.date_info
        elif self.date_type == 'week':
            df_today = df.loc[df.year_week == f'{self.date_info}']
            year_day = list(df_today.date)[0]
        elif self.date_type == 'month':
            df_today = df.loc[df.year_month == f'{self.date_info}']
            year_day = list(df_today.date)[0]
        return year_day

    @staticmethod
    def udf_year_week(dt):
        year, week = dt.split("-")[0], dt.split("-")[1]
        if int(week) < 10:
            return f"{year}-0{week}"
        else:
            return f"{year}-{week}"

    @staticmethod
    def udf_get_img_type(img_type):
        all_img_type = str(img_type).split(",")
        if ('2' in all_img_type) & ('3' in all_img_type):
            return 'aadd_video_num'
        elif ('2' not in all_img_type) & ('3' in all_img_type):
            return 'aadd_no_video_num'
        elif ('2' not in all_img_type) & ('3' not in all_img_type):
            return 'no_aadd_no_video_num'
        elif ('2' in all_img_type) & ('3' not in all_img_type):
            return 'no_aadd_video_num'

    @staticmethod
    def udf_get_buy_box_type(buy_box):
        if "1" == str(buy_box):
            return 'Amazon'
        elif "2" == str(buy_box):
            return 'FBA'
        elif "3" == str(buy_box):
            return 'FBM'
        else:
            return 'other'

    @staticmethod
    def udf_get_seller_num(seller_type, all_seller):
        all_seller_type = str(seller_type).split(",")
        seller_num = ''
        for i in range(len(all_seller_type)):
            splits = all_seller.count(all_seller_type[i])
            if(i < len(all_seller_type) - 1):
                seller_num = seller_num + str(splits) + ','
            else:
               seller_num = seller_num + str(splits)
        return seller_num

    @staticmethod
    def udf_get_seller_bsr_orders(seller_type, all_seller, all_seller_bsr_orders):
        all_seller_type = str(seller_type).split(",")
        all_seller_list = str(all_seller).split(",")
        all_seller_bsr_orders = str(all_seller_bsr_orders).split(",")
        seller_bsr_orders = ""
        for (i, j) in zip(all_seller_list, all_seller_bsr_orders):
            one_seller_bsr_orders = 0
            for z in all_seller_type:
                if z == i:
                    one_seller_bsr_orders += int(j)
                if( z != all_seller_type[-1]):
                    seller_bsr_orders = seller_bsr_orders + str(one_seller_bsr_orders) + ','
                else:
                    seller_bsr_orders = seller_bsr_orders + str(one_seller_bsr_orders)
        return  seller_bsr_orders

    @staticmethod
    def udf_get_buy_box_num(buy_box_type, buy_box_list):
        all_buy_box_type = str(buy_box_type).split(",")
        buy_box_num = ''
        for i in range(len(all_buy_box_type)):
            splits = buy_box_list.count(all_buy_box_type[i])
            if (i < len(all_buy_box_type) - 1):
                buy_box_num = buy_box_num + str(splits) + ','
            else:
                buy_box_num = buy_box_num + str(splits)
        return buy_box_num

    @staticmethod
    def udf_get_color_num(color_type, color_list):
        all_color_type = str(color_type).split("&&&&")
        color_num = ''
        for i in range(len(all_color_type)):
            splits = color_list.count(all_color_type[i])
            if (i < len(all_color_type) - 1):
                color_num = color_num + str(splits) + ','
            else:
                color_num = color_num + str(splits)
        return color_num


    def read_data(self):
        print("1.1 读取dwd_st_asin_measure系列表")
        sql = f"select search_term, asin " \
              f"from dwd_st_asin_measure " \
              f"where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}';"
              # f"where site_name='{self.site_name}' and dt in '{self.year_week_tuple}'"
        print("sql:", sql)
        self.df_st_asin_measure = self.spark.sql(sql).cache()
        #self.df_st_asin_info.show(10, truncate=False)
        print("1.2 读取dim_cal_asin_history_detail系列表")
        sql = f"select asin, asin_price, asin_is_new, asin_img_type, asin_is_aadd, asin_launch_time, asin_total_comments, asin_buy_box_seller_type, asin_color, asin_brand_name from dim_cal_asin_history_detail " \
              f"where site_name='{self.site_name}';"
        print("sql:", sql)
        self.df_asin_history_detail = self.spark.sql(sql).cache()
        #空值处理
        self.df_asin_history_detail = self.df_asin_history_detail.na.fill({"asin_price": 0.0, "asin_is_new": -1, "asin_img_type": -1})
        self.df_asin_history_detail = self.df_asin_history_detail.withColumnRenamed("asin_price", 'price')
        #self.df_asin_detail.show(10, truncate=False)
        print("1.3 读取dwd_st_measure系列表")
        sql = f"select search_term, st_zr_orders " \
              f"from dwd_st_measure " \
              f"where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}';"
        print("sql:", sql)
        self.df_st_measeure = self.spark.sql(sqlQuery=sql).cache()
        #空值处理
        self.df_st_measeure = self.df_st_measeure.na.fill({"st_zr_orders":0})
        self.df_st_measeure = self.df_st_measeure.withColumnRenamed("st_zr_orders", 'orders')
        print("1.4 读取ods_st_key系列表")
        sql = f"select cast(st_key as int) as search_term_id , search_term "\
              f"from ods_st_key "\
              f"where site_name='{self.site_name}';"
        print("sql:", sql)
        self.df_st_key = self.spark.sql(sqlQuery=sql).cache()
        print("1.5 读取dws_top100_asin_info系列表")
        sql = f"select search_term_id, top100_asin, top100_orders, top100_market_share, top100_is_new "\
              f"from dws_top100_asin_info "\
              f"where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}';"
        print("sql:", sql)
        self.df_top100_asin = self.spark.sql(sqlQuery=sql).cache()
        print("1.6 读取dwd_asin_measure系列表")
        sql = f"select asin, cast(asin_bsr_orders as int) as asin_bsr_orders, asin_ao_val "\
              f"from dwd_asin_measure "\
              f"where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}';"
        print("sql:", sql)
        self.df_asin_measure = self.spark.sql(sqlQuery=sql).cache()
        self.df_asin_measure = self.df_asin_measure.na.fill({"asin_bsr_orders": 0, "asin_ao_val": 0.0})
        print("1.7 读取dim_seller_asin_history_info系列表")
        sql = f"select asin, upper(country_name) as seller_name " \
              f"from dim_seller_asin_history_info  where site_name = '{self.site_name}';"
        print("sql:", sql)
        self.df_seller_asin = self.spark.sql(sqlQuery=sql).cache()

    def get_st_asin_detail(self):
        self.df_st_asin_detail=self.df_st_asin_measure.join(
            self.df_asin_history_detail, on=['asin'], how='left'
        ).join(
            self.df_asin_measure, on=['asin'], how='left'
        )

        self.df_st_asin_detail = self.df_st_asin_detail.withColumn('aadd_asin_bsr_orders', F.when(F.col("asin_is_aadd") == 1, F.lit(self.df_st_asin_detail.asin_bsr_orders)).\
                                                                   when(F.col("asin_is_aadd") != 1, F.lit(0))
                                                                   )
        self.df_st_asin_detail = self.df_st_asin_detail.na.fill({"price":0.0, "aadd_asin_bsr_orders":0})
        #self.df_st_asin_detail.show(10, truncate=False)

    def get_st_attribute(self):
        self.df_st_new_asin_group = self.df_st_asin_detail.groupby(["search_term"]).agg(
            F.sum("asin_is_new").alias('new_asin_num'))
        self.df_st_bsr_orders_group = self.df_st_asin_detail.groupby(["search_term"]).agg(
            F.sum("asin_bsr_orders").alias('bsr_orders'))
        self.df_st_aadd_bsr_orders_group = self.df_st_asin_detail.groupby(["search_term"]).agg(
            F.sum("aadd_asin_bsr_orders").alias('aadd_bsr_orders'))

    def get_st_asin_seller(self):
        self.df_seller_asin = self.df_st_asin_measure.join(
            self.df_seller_asin, on=['asin'], how='left'
        ).join(
            self.df_asin_measure, on=['asin'], how='left'
        )

        self.df_seller_asin_seller_name_list  = self.df_seller_asin.groupBy(['search_term']).agg(F.concat_ws(",", F.collect_list("seller_name")).alias("seller_name_list"))

        self.df_seller_asin_seller_bsr_orders_list = self.df_seller_asin.groupBy(['search_term']).agg(F.concat_ws(",", F.collect_list("asin_bsr_orders")).alias("seller_bsr_orders_list"))

        self.df_seller_asin_seller_name_type = self.df_seller_asin.groupBy(['search_term']).agg(F.concat_ws(",", F.collect_set("seller_name")).alias("seller_name_type"))

        self.df_st_seller = self.df_st_measeure.join(
            self.df_seller_asin_seller_name_list, on=['search_term'], how='left'
        ).join(
            self.df_seller_asin_seller_name_type, on=['search_term'], how='left'
        ).join(
            self.df_seller_asin_seller_bsr_orders_list, on=['search_term'], how='left'
        )

        self.df_st_seller = self.df_st_seller.withColumn("seller_num",self.u_get_seller_num(self.df_st_seller.seller_name_type, self.df_st_seller.seller_name_list))
        self.df_st_seller = self.df_st_seller.withColumn("seller_bsr_orders",self.u_get_seller_bsr_orders(self.df_st_seller.seller_name_type, self.df_st_seller.seller_name_list, self.df_st_seller.seller_bsr_orders_list))

        self.df_st_seller = self.df_st_seller.drop("st_zr_orders")
        self.df_st_seller = self.df_st_seller.drop("orders")
        self.df_st_seller = self.df_st_seller.withColumnRenamed("seller_name_type", "seller_name")
        self.df_st_seller.show(10, truncate=False)

    def get_st_buy_box(self):
        self.df_st_asin_buy_box_detail = self.df_st_asin_detail.withColumn("buy_box_seller_type",self.u_get_buy_box(self.df_st_asin_detail.asin_buy_box_seller_type))
        self.df_st_buy_box_name_type = self.df_st_asin_buy_box_detail.groupby(['search_term']).agg(F.concat_ws(",", F.collect_set(self.df_st_asin_buy_box_detail.buy_box_seller_type)).alias("buy_box_name"))
        self.df_st_buy_box_name_list = self.df_st_asin_buy_box_detail.groupby(['search_term']).agg(F.concat_ws(",", F.collect_list(self.df_st_asin_buy_box_detail.buy_box_seller_type)).alias("buy_box_list"))

        self.df_st_buy_box = self.df_st_measeure.join(
            self.df_st_buy_box_name_type, on=['search_term'], how='left'
        ).join(
            self.df_st_buy_box_name_list, on=['search_term'], how='left'
        )
        self.df_st_buy_box = self.df_st_buy_box.withColumn("buy_box_num", self.u_get_buy_box_num(self.df_st_buy_box.buy_box_name, self.df_st_buy_box.buy_box_list))

        self.df_st_buy_box = self.df_st_buy_box.drop("buy_box_list")
        self.df_st_buy_box = self.df_st_buy_box.drop("orders")
        print("buy_box------------------")

    def get_st_color(self):
        self.df_st_color_name_type = self.df_st_asin_detail.groupby(["search_term"]).agg(F.concat_ws("&&&&", F.collect_set(self.df_st_asin_detail.asin_color)).alias("color_name"))
        self.df_st_color_name_list = self.df_st_asin_detail.groupby(["search_term"]).agg(F.concat_ws(",", F.collect_set(self.df_st_asin_detail.asin_color)).alias("color_list"))

        self.df_st_color = self.df_st_measeure.join(
            self.df_st_color_name_type, on=['search_term'], how='left'
        ).join(
            self.df_st_color_name_list, on=['search_term'], how='left'
        )
        self.df_st_color = self.df_st_color.withColumn("color_num", self.u_get_color_num(self.df_st_color.color_name, self.df_st_color.color_list))
        self.df_st_color = self.df_st_color.drop("color_list")
        self.df_st_color = self.df_st_color.drop("orders")

    def get_img_type(self):
        self.df_st_asin_detail = self.df_st_asin_detail.withColumn('img_type', self.u_get_img_type(self.df_st_asin_detail.asin_img_type))
        #self.df_st_asin_detail.show(10, truncate=False)
        self.df_st_img_type_group = self.df_st_asin_detail.groupby(["search_term"]). \
            pivot(f"img_type").agg(F.count(f"search_term"))
        # self.df_st_img_type_group.show(10, truncate=False)

    def get_price_range(self):
        self.df_st_asin_detail_price = self.df_st_asin_detail.withColumn('price_type',F.when((F.col("price") > 0) & (F.col("price") < 4), "price_range_num1"). \
                                                             when((F.col("price") >= 4) & (F.col("price") < 6), "price_range_num2"). \
                                                             when((F.col("price") >= 6) & (F.col("price") < 8), "price_range_num3"). \
                                                             when((F.col("price") >= 8) & (F.col("price") < 10), "price_range_num4"). \
                                                             when((F.col("price") >= 10) & (F.col("price") < 12), "price_range_num5"). \
                                                             when((F.col("price") >= 12) & (F.col("price") < 14), "price_range_num6"). \
                                                             when((F.col("price") >= 14) & (F.col("price") < 16), "price_range_num7"). \
                                                             when((F.col("price") >= 16) & (F.col("price") < 18), "price_range_num8"). \
                                                             when((F.col("price") >= 18) & (F.col("price") < 20), "price_range_num9"). \
                                                             when((F.col("price") >= 20) & (F.col("price") < 23), "price_range_num10"). \
                                                             when((F.col("price") >= 23) & (F.col("price") < 25), "price_range_num11"). \
                                                             when((F.col("price") >= 25) & (F.col("price") < 28), "price_range_num12"). \
                                                             when((F.col("price") >= 28) & (F.col("price") < 30), "price_range_num13"). \
                                                             when((F.col("price") >= 30) & (F.col("price") < 35), "price_range_num14"). \
                                                             when((F.col("price") >= 35) & (F.col("price") < 40), "price_range_num15"). \
                                                             when((F.col("price") >= 40) & (F.col("price") < 45), "price_range_num16"). \
                                                             when((F.col("price") >= 45) & (F.col("price") < 50), "price_range_num17"). \
                                                             when(F.col("price") >= 50, "price_range_num18")
                                                           )
        # self.df_st_asin_detail_price.show(10, truncate=False)

        self.df_st_price_group = self.df_st_asin_detail_price.groupby(["search_term"]). \
            pivot(f"price_type").agg(F.count(f"search_term"))
        self.df_st_price_group = self.df_st_price_group.select("search_term", "price_range_num1", "price_range_num2",
                                                               "price_range_num3", "price_range_num4",
                                                               "price_range_num5", "price_range_num6",
                                                               "price_range_num7", "price_range_num8",
                                                               "price_range_num9", "price_range_num10",
                                                               "price_range_num11", "price_range_num12",
                                                               "price_range_num13",
                                                               "price_range_num14", "price_range_num15",
                                                               "price_range_num16", "price_range_num17",
                                                               "price_range_num18")
        # self.df_st_price_group.show(10, truncate=False)

        self.df_st_price_group = self.df_st_price_group.join(
            self.df_asin_count, on=['search_term'], how='left'
        )

        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share1',
                                                                   self.df_st_price_group.price_range_num1 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share2',
                                                                   self.df_st_price_group.price_range_num2 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share3',
                                                                   self.df_st_price_group.price_range_num3 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share4',
                                                                   self.df_st_price_group.price_range_num4 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share5',
                                                                   self.df_st_price_group.price_range_num5 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share6',
                                                                   self.df_st_price_group.price_range_num6 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share7',
                                                                   self.df_st_price_group.price_range_num7 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share8',
                                                                   self.df_st_price_group.price_range_num8 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share9',
                                                                   self.df_st_price_group.price_range_num9 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share10',
                                                                   self.df_st_price_group.price_range_num10 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share11',
                                                                   self.df_st_price_group.price_range_num11 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share12',
                                                                   self.df_st_price_group.price_range_num12 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share13',
                                                                   self.df_st_price_group.price_range_num13 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share14',
                                                                   self.df_st_price_group.price_range_num14 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share15',
                                                                   self.df_st_price_group.price_range_num15 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share16',
                                                                   self.df_st_price_group.price_range_num16 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share17',
                                                                   self.df_st_price_group.price_range_num17 / self.df_st_price_group.total_asin_num)
        self.df_st_price_group = self.df_st_price_group.withColumn('price_range_market_share18',
                                                                   self.df_st_price_group.price_range_num18 / self.df_st_price_group.total_asin_num)

    def get_ao_range(self):
        self.df_st_asin_detail_ao = self.df_st_asin_detail.withColumn('ao_type', F.when((F.col("asin_ao_val") > 0) & (F.col("asin_ao_val") < 0.05), "ao_range_val1").\
                                                             when((F.col("asin_ao_val") >= 0.05) & (F.col("asin_ao_val") < 0.1), "ao_range_val2").\
                                                             when((F.col("asin_ao_val") >= 0.1) & (F.col("asin_ao_val") < 0.2), "ao_range_val3"). \
                                                             when((F.col("asin_ao_val") >= 0.2) & (F.col("asin_ao_val") < 0.3), "ao_range_val4"). \
                                                             when((F.col("asin_ao_val") >= 0.3) & (F.col("asin_ao_val") < 0.4), "ao_range_val5"). \
                                                             when((F.col("asin_ao_val") >= 0.4) & (F.col("asin_ao_val") < 0.5), "ao_range_val6"). \
                                                             when((F.col("asin_ao_val") >= 0.5) & (F.col("asin_ao_val") < 0.6), "ao_range_val7"). \
                                                             when((F.col("asin_ao_val") >= 0.6) & (F.col("asin_ao_val") < 0.7), "ao_range_val8"). \
                                                             when((F.col("asin_ao_val") >= 0.7) & (F.col("asin_ao_val") < 0.8), "ao_range_val9"). \
                                                             when((F.col("asin_ao_val") >= 0.8) & (F.col("asin_ao_val") < 0.9), "ao_range_val10"). \
                                                             when((F.col("asin_ao_val") >= 0.9) & (F.col("asin_ao_val") < 1), "ao_range_val11"). \
                                                             when(F.col("asin_ao_val") >= 1, "ao_range_val12")
                                                           )
        #self.df_st_asin_detail_ao.show(10, truncate=False)

        self.df_st_ao_group = self.df_st_asin_detail_ao.groupby(["search_term"]). \
            pivot(f"ao_type").agg(F.count(f"search_term"))
        self.df_st_ao_group = self.df_st_ao_group.select("search_term", "ao_range_val1", "ao_range_val2",
                                                         "ao_range_val3", "ao_range_val4", "ao_range_val5",
                                                         "ao_range_val6", "ao_range_val7",
                                                         "ao_range_val8", "ao_range_val9", "ao_range_val10",
                                                         "ao_range_val11", "ao_range_val12")
        # self.df_st_ao_group.show(10, truncate=False)

        self.df_st_ao_group = self.df_st_ao_group.join(
            self.df_asin_count, on=['search_term'], how='left'
        )

        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share1',
                                                             self.df_st_ao_group.ao_range_val1 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share2',
                                                             self.df_st_ao_group.ao_range_val2 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share3',
                                                             self.df_st_ao_group.ao_range_val3 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share4',
                                                             self.df_st_ao_group.ao_range_val4 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share5',
                                                             self.df_st_ao_group.ao_range_val5 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share6',
                                                             self.df_st_ao_group.ao_range_val6 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share7',
                                                             self.df_st_ao_group.ao_range_val7 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share8',
                                                             self.df_st_ao_group.ao_range_val8 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share9',
                                                             self.df_st_ao_group.ao_range_val9 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share10',
                                                             self.df_st_ao_group.ao_range_val10 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share11',
                                                             self.df_st_ao_group.ao_range_val11 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.withColumn('ao_range_market_share12',
                                                             self.df_st_ao_group.ao_range_val12 / self.df_st_ao_group.total_asin_num)
        self.df_st_ao_group = self.df_st_ao_group.drop("total_asin_num")

    def get_asin_count(self):
        self.df_asin_count = self.df_st_asin_detail.groupby(["search_term"]).agg(
            F.count(f"asin").alias('total_asin_num'))

    def get_launch_time_range(self):
        time = datetime.datetime.strptime(self.year_day, '%Y-%m-%d')
        one_month_time = time+datetime.timedelta(days=-30)
        three_months_time = time + datetime.timedelta(days=-90)
        six_months_time = time + datetime.timedelta(days=-180)
        twelve_months_time = time + datetime.timedelta(days=-360)
        fifteen_months_time = time + datetime.timedelta(days=-450)
        twenty_four_months_time = time + datetime.timedelta(days=-720)
        thirty_six_months_time = time + datetime.timedelta(days=-1080)
        self.df_st_asin_detail_launch_time = self.df_st_asin_detail.withColumn('launch_time_type', F.when(F.col("asin_launch_time") >= one_month_time, "launch_time_num1").\
                                                                               when((F.col("asin_launch_time") >= three_months_time) & (F.col("asin_launch_time") < one_month_time), "launch_time_num2").\
                                                                               when((F.col("asin_launch_time") >= six_months_time) & (F.col("asin_launch_time") < three_months_time), "launch_time_num3").\
                                                                               when((F.col("asin_launch_time") >= twelve_months_time) & (F.col("asin_launch_time") < six_months_time), "launch_time_num4").\
                                                                               when((F.col("asin_launch_time") >= fifteen_months_time) & (F.col("asin_launch_time") < twelve_months_time), "launch_time_num5"). \
                                                                               when((F.col("asin_launch_time") >= twenty_four_months_time) & (F.col("asin_launch_time") < fifteen_months_time), "launch_time_num6"). \
                                                                               when((F.col("asin_launch_time") >= thirty_six_months_time) & (F.col("asin_launch_time") < twenty_four_months_time), "launch_time_num7")
                                                                               )
        self.df_st_launch_time_group = self.df_st_asin_detail_launch_time.groupby(["search_term"]). \
            pivot(f"launch_time_type").agg(F.count(f"search_term"))
        self.df_st_launch_time_group = self.df_st_launch_time_group.select("search_term", "launch_time_num1",
                                                                           "launch_time_num2", "launch_time_num3",
                                                                           "launch_time_num4", "launch_time_num5",
                                                                           "launch_time_num6", "launch_time_num7")
        self.df_st_launch_time_group = self.df_st_launch_time_group.join(
            self.df_asin_count, on=["search_term"], how='left'
        )

        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share1',
                                                                               self.df_st_launch_time_group.launch_time_num1 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share2',
                                                                               self.df_st_launch_time_group.launch_time_num2 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share3',
                                                                               self.df_st_launch_time_group.launch_time_num3 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share4',
                                                                               self.df_st_launch_time_group.launch_time_num4 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share5',
                                                                               self.df_st_launch_time_group.launch_time_num5 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share6',
                                                                               self.df_st_launch_time_group.launch_time_num6 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.withColumn('launch_time_market_share7',
                                                                               self.df_st_launch_time_group.launch_time_num7 / self.df_st_launch_time_group.total_asin_num)
        self.df_st_launch_time_group = self.df_st_launch_time_group.drop("total_asin_num")

    def handle_df_cols(self, cols1_list, cols2_list, cols_sum_list, df):
        for col1, col2, col_sum in zip(cols1_list, cols2_list, cols_sum_list):
            df = df.withColum(
                col1, df[col2] / df[col_sum]
            )
        return df

    def get_comments_num_range(self):
        self.df_st_asin_detail_comments = self.df_st_asin_detail.withColumn("comments_type", F.when((F.col("asin_total_comments") > 0) & (F.col("asin_total_comments") < 50), "comments_num1"). \
                                                                            when((F.col("asin_total_comments") >= 50) & (F.col("asin_total_comments") < 100), "comments_num2"). \
                                                                            when((F.col("asin_total_comments") >= 100) & (F.col("asin_total_comments") < 150), "comments_num3"). \
                                                                            when((F.col("asin_total_comments") >= 150) & (F.col("asin_total_comments") < 200), "comments_num4"). \
                                                                            when((F.col("asin_total_comments") >= 200) & (F.col("asin_total_comments") < 300), "comments_num5"). \
                                                                            when((F.col("asin_total_comments") >= 300) & (F.col("asin_total_comments") < 400), "comments_num6"). \
                                                                            when((F.col("asin_total_comments") >= 400) & (F.col("asin_total_comments") < 500), "comments_num7"). \
                                                                            when((F.col("asin_total_comments") >= 500) & (F.col("asin_total_comments") < 600), "comments_num8"). \
                                                                            when((F.col("asin_total_comments") >= 600) & (F.col("asin_total_comments") < 700), "comments_num9"). \
                                                                            when((F.col("asin_total_comments") >= 700) & (F.col("asin_total_comments") < 800), "comments_num10"). \
                                                                            when((F.col("asin_total_comments") >= 800) & (F.col("asin_total_comments") < 900), "comments_num11"). \
                                                                            when((F.col("asin_total_comments") >= 900) & (F.col("asin_total_comments") < 1000), "comments_num12"). \
                                                                            when(F.col("asin_total_comments") >= 1000 , "comments_num13")
                                                                            )
        self.df_st_comments_group = self.df_st_asin_detail_comments.groupby(["search_term"]). \
            pivot(f"comments_type").agg(F.count(f"search_term"))
        self.df_st_comments_group = self.df_st_comments_group.select("search_term", "comments_num1", "comments_num2",
                                                                     "comments_num3", "comments_num4", "comments_num5",
                                                                     "comments_num6", "comments_num7",
                                                                     "comments_num8", "comments_num9", "comments_num10",
                                                                     "comments_num11", "comments_num12",
                                                                     "comments_num13")
        self.df_st_comments_group = self.df_st_comments_group.join(
            self.df_asin_count, on=["search_term"], how='left'
        )

        comment_cols1_list = [f"comments_num_market_share{i}" for i in range(1, 14)]
        comment_cols2_list = [f"comments_num{i}" for i in range(1, 14)]
        comment_cols_sum_list = [f"total_asin_num" for i in range(1, 14)]
        self.df_st_comments_group = self.handle_df_cols(cols1_list=comment_cols1_list, cols2_list=comment_cols2_list, cols_sum_list=comment_cols_sum_list, df=self.df_st_comments_group)



        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share1',
                                                                         self.df_st_comments_group.comments_num1 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share2',
                                                                         self.df_st_comments_group.comments_num2 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share3',
                                                                         self.df_st_comments_group.comments_num3 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share4',
                                                                         self.df_st_comments_group.comments_num4 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share5',
                                                                         self.df_st_comments_group.comments_num5 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share6',
                                                                         self.df_st_comments_group.comments_num6 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share7',
                                                                         self.df_st_comments_group.comments_num7 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share8',
                                                                         self.df_st_comments_group.comments_num8 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share9',
                                                                         self.df_st_comments_group.comments_num9 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share10',
                                                                         self.df_st_comments_group.comments_num10 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share11',
                                                                         self.df_st_comments_group.comments_num11 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share12',
                                                                         self.df_st_comments_group.comments_num12 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.withColumn('comments_num_market_share13',
                                                                         self.df_st_comments_group.comments_num13 / self.df_st_comments_group.total_asin_num)
        self.df_st_comments_group = self.df_st_comments_group.drop("total_asin_num")

    def get_top20_asin(self):

        bsr_orders_window = Window.partitionBy(["search_term"]).orderBy(
            self.df_st_asin_detail.asin_bsr_orders.desc_nulls_last()
        )
        self.df_st_asin_top20_bsr_orders = self.df_st_asin_detail.withColumn("bsr_orders_rank", F.row_number().over(window = bsr_orders_window))

        self.df_st_asin_top20 = self.df_st_asin_top20_bsr_orders.filter("bsr_orders_rank<=20")

        self.df_st_top20_asin_brand = self.df_st_asin_top20.select("search_term", "asin_brand_name")


        self.df_st_top20_asin = self.df_st_asin_top20.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_asin_top20.asin)).alias("top20_asin"))
        self.df_st_top20_bsr_orders = self.df_st_asin_top20.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_asin_top20.asin_bsr_orders)).alias("top20_orders"))
        self.df_st_top20_brand = self.df_st_asin_top20.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_asin_top20.asin_brand_name)).alias("top20_brand"))


        self.df_st_top20_brand_total_asin = self.df_st_asin_detail.groupBy(["search_term", "asin_brand_name"]).agg(F.count(f"asin").alias("brand_total_asin"))

        self.df_st_top20_brand_new_asin = self.df_st_asin_detail.groupBy(["search_term", "asin_brand_name"]).agg(F.sum(f"asin_is_new").alias("brand_new_asin"))

        self.df_st_top20_brand_bsr_orders = self.df_st_asin_detail.groupBy(["search_term", "asin_brand_name"]).agg(F.sum(f"asin_bsr_orders").alias("brand_bsr_orders"))

        df_ = self.df_st_asin_detail.groupBy(["search_term", "asin_brand_name"]).agg(
            {
                "asin": "count",
                "asin_is_new": "sum",
                "asin_bsr_orders": "sum",
            }
        )
        # 重命名

        self.df_st_bsr_orders_group = self.df_st_asin_detail.groupby(["search_term"]).agg(F.sum("asin_bsr_orders").alias('bsr_orders'))


        self.df_st_top20_asin_brand = self.df_st_top20_asin_brand.join(
            self.df_st_top20_brand_total_asin, on=["search_term", "asin_brand_name"], how='left'
        ).join(
            self.df_st_top20_brand_new_asin, on=["search_term", "asin_brand_name"], how='left'
        ).join(
           self.df_st_top20_brand_bsr_orders, on=["search_term", "asin_brand_name"], how='left'
        ).join(
            self.df_st_bsr_orders_group, on=["search_term"], how='left'
        )
        self.df_st_top20_asin_brand = self.df_st_top20_asin_brand.withColumn("brand_new_num_proportion",self.df_st_top20_asin_brand.brand_new_asin / self.df_st_top20_asin_brand.brand_total_asin)

        self.df_st_top20_asin_brand  = self.df_st_top20_asin_brand.withColumn("brand_market_share",self.df_st_top20_asin_brand.brand_bsr_orders / self.df_st_top20_asin_brand.bsr_orders)

        self.df_st_top20_brand_new_num_proportion = self.df_st_top20_asin_brand.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_top20_asin_brand.brand_new_num_proportion)).alias("top20_brand_new_num_proportion"))
        self.df_st_top20_brand_bsr_oders_group = self.df_st_top20_asin_brand.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_top20_asin_brand.brand_bsr_orders)).alias("top20_brand_bsr_oders"))
        self.df_st_top20_brand_market_share = self.df_st_top20_asin_brand.groupBy(["search_term"]).agg(F.concat_ws(",", F.collect_list(self.df_st_top20_asin_brand.brand_market_share)).alias("top20_brand_market_share"))

        self.df_st_top20 = self.df_st_measeure.join(
            self.df_st_top20_asin, on=["search_term"], how='left'
        ).join(
            self.df_st_top20_bsr_orders, on=['search_term'], how='left'
        ).join(
            self.df_st_top20_brand, on=['search_term'], how='left'
        ).join(
            self.df_st_top20_brand_new_num_proportion, on=['search_term'], how='left'
        ).join(
            self.df_st_top20_brand_bsr_oders_group, on=['search_term'], how='left'
        ).join(
            self.df_st_top20_brand_market_share, on=['search_term'], how='left'
        )
        self.df_st_top20 = self.df_st_top20.select("search_term", "top20_asin", "top20_orders", "top20_brand", "top20_brand_new_num_proportion", "top20_brand_bsr_oders", "top20_brand_market_share")
        self.df_st_top20.show(10, truncate=False)

    def handle_data_group(self):
        self.df_st_measeure = self.df_st_measeure.join(
            self.df_st_key, on=['search_term'], how='inner'
        )
        self.df_save = self.df_st_measeure.join(
            self.df_st_price_group, on=['search_term'], how='left'
        ).join(
            self.df_st_ao_group, on=['search_term'], how='left'
        ).join(
            self.df_st_new_asin_group, on=['search_term'], how='left'
        ).join(
            self.df_st_bsr_orders_group, on=['search_term'], how='left'
        ).join(
            self.df_st_img_type_group, on=['search_term'], how='left'
        ).join(
            self.df_top100_asin, on=['search_term_id'], how='left'
        ).join(
            self.df_st_aadd_bsr_orders_group, on=['search_term'], how='left'
        ).join(
            self.df_st_launch_time_group, on=['search_term'], how='left'
        ).join(
            self.df_st_comments_group, on=['search_term'], how='left'
        ).join(
            self.df_st_buy_box, on=['search_term'], how='left'
        ).join(
            self.df_st_seller, on=['search_term'], how='left'
        ).join(
            self.df_st_color, on=['search_term'], how='left'
        ).join(
            self.df_st_top20, on=['search_term'], how='left'
        )
        self.df_save = self.df_save.select("search_term_id", "search_term", "price_range_num1", "price_range_num2", "price_range_num3", "price_range_num4", "price_range_num5", "price_range_num6", "price_range_num7",
                                           "price_range_num8", "price_range_num9", "price_range_num10", "price_range_num11", "price_range_num12", "price_range_num13", "price_range_num14", "price_range_num15", "price_range_num16",
                                           "price_range_num17", "price_range_num18",
                                           "price_range_market_share1", "price_range_market_share2", "price_range_market_share3", "price_range_market_share4", "price_range_market_share5", "price_range_market_share6", "price_range_market_share7",
                                           "price_range_market_share8", "price_range_market_share9", "price_range_market_share10", "price_range_market_share11", "price_range_market_share12", "price_range_market_share13", "price_range_market_share14",
                                           "price_range_market_share15", "price_range_market_share16", "price_range_market_share17", "price_range_market_share18",
                                           "ao_range_val1", "ao_range_val2", "ao_range_val3", "ao_range_val4", "ao_range_val5", "ao_range_val6", "ao_range_val7", "ao_range_val8", "ao_range_val9", "ao_range_val10", "ao_range_val11", "ao_range_val12",
                                           "ao_range_market_share1", "ao_range_market_share2", "ao_range_market_share3", "ao_range_market_share4", "ao_range_market_share5", "ao_range_market_share6", "ao_range_market_share7", "ao_range_market_share8",
                                           "ao_range_market_share9", "ao_range_market_share10", "ao_range_market_share11", "ao_range_market_share12",
                                           "total_asin_num", "new_asin_num", "orders", "bsr_orders", "aadd_bsr_orders", "aadd_video_num", "aadd_no_video_num", "no_aadd_no_video_num", "no_aadd_video_num", "top100_asin", "top100_orders", "top100_market_share", "top100_is_new",
                                           "launch_time_num1", "launch_time_num2", "launch_time_num3", "launch_time_num4", "launch_time_num5", "launch_time_num6", "launch_time_num7",
                                           "launch_time_market_share1", "launch_time_market_share2", "launch_time_market_share3", "launch_time_market_share4", "launch_time_market_share5", "launch_time_market_share6", "launch_time_market_share7",
                                           "top20_asin", "top20_orders", "top20_brand", "top20_brand_new_num_proportion", "top20_brand_bsr_oders", "top20_brand_market_share",
                                           "comments_num1", "comments_num2", "comments_num3", "comments_num4", "comments_num5", "comments_num6", "comments_num7", "comments_num8", "comments_num9", "comments_num10", "comments_num11", "comments_num12", "comments_num13",
                                           "comments_num_market_share1", "comments_num_market_share2", "comments_num_market_share3", "comments_num_market_share4", "comments_num_market_share5", "comments_num_market_share6", "comments_num_market_share7", "comments_num_market_share8",
                                           "comments_num_market_share9", "comments_num_market_share10", "comments_num_market_share11", "comments_num_market_share12", "comments_num_market_share13",
                                           "buy_box_name", "buy_box_num", "seller_name", "seller_num", "seller_bsr_orders", "color_name", "color_num")

        self.df_save = self.df_save.withColumn("created_time", F.date_format(F.current_timestamp(), 'yyyy-MM-dd HH:mm:SS')).\
            withColumn("updated_time", F.date_format(F.current_timestamp(), 'yyyy-MM-dd HH:mm:SS'))
        self.df_save = self.df_save.\
            na.fill({"price_range_num1": 0, "price_range_num2":0, "price_range_num3":0,"price_range_num4":0,"price_range_num5":0,"price_range_num6":0,"price_range_num7":0,"price_range_num8":0,"price_range_num9":0,
                     "price_range_num10": 0, "price_range_num11":0,"price_range_num12":0,"price_range_num13":0,"price_range_num14":0,"price_range_num15":0,"price_range_num16":0,"price_range_num17":0,"price_range_num18":0,
                     "price_range_market_share1": 0.0,"price_range_market_share2": 0.0,"price_range_market_share3": 0.0,"price_range_market_share4": 0.0,"price_range_market_share5": 0.0,"price_range_market_share6": 0.0,"price_range_market_share7": 0.0,
                     "price_range_market_share8": 0.0,"price_range_market_share9": 0.0,"price_range_market_share10": 0.0,"price_range_market_share11": 0.0,"price_range_market_share12": 0.0,"price_range_market_share13": 0.0,"price_range_market_share14": 0.0,
                     "price_range_market_share15": 0.0,"price_range_market_share16": 0.0,"price_range_market_share17": 0.0,"price_range_market_share18": 0.0,
                     "ao_range_val1": 0,"ao_range_val2": 0,"ao_range_val3": 0,"ao_range_val4": 0,"ao_range_val6": 0,"ao_range_val6": 0,"ao_range_val7": 0,"ao_range_val8": 0,"ao_range_val9": 0,
                     "ao_range_val10": 0,"ao_range_val11": 0,"ao_range_val12": 0,
                     "ao_range_market_share1":0.0, "ao_range_market_share2":0.0, "ao_range_market_share3":0.0, "ao_range_market_share4":0.0, "ao_range_market_share5":0.0, "ao_range_market_share6":0.0,
                     "ao_range_market_share7": 0.0, "ao_range_market_share8":0.0, "ao_range_market_share9":0.0, "ao_range_market_share10":0.0, "ao_range_market_share11":0.0, "ao_range_market_share12":0.0,
                     "total_asin_num": 0,"new_asin_num": 0,"orders": 0,"bsr_orders": 0,"aadd_video_num": 0,"aadd_no_video_num": 0,"no_aadd_no_video_num": 0,"no_aadd_video_num": 0,
                     "launch_time_num1":0,"launch_time_num2":0,"launch_time_num3":0,"launch_time_num4":0,"launch_time_num5":0,"launch_time_num6":0,"launch_time_num7":0,
                     "launch_time_market_share1":0.0,"launch_time_market_share2":0.0,"launch_time_market_share3":0.0,"launch_time_market_share4":0.0,"launch_time_market_share5":0.0,"launch_time_market_share6":0.0,"launch_time_market_share7":0.0,
                     "comments_num1":0,"comments_num2":0,"comments_num3":0,"comments_num4":0,"comments_num5":0,"comments_num6":0,"comments_num7":0,"comments_num8":0,"comments_num9":0,"comments_num10":0,"comments_num11":0,"comments_num12":0,"comments_num13":0,
                     "comments_num_market_share1":0.0,"comments_num_market_share2":0.0,"comments_num_market_share3":0.0,"comments_num_market_share4":0.0,"comments_num_market_share5":0.0,"comments_num_market_share6":0.0,"comments_num_market_share7":0.0,
                     "comments_num_market_share8":0.0,"comments_num_market_share9":0.0,"comments_num_market_share10":0.0,"comments_num_market_share11":0.0,"comments_num_market_share12":0.0,"comments_num_market_share13":0.0
                     })

        # 预留字段补全
        self.df_save = self.df_save.withColumn("re_string_field1", F.lit("null"))
        self.df_save = self.df_save.withColumn("re_string_field2", F.lit("null"))
        self.df_save = self.df_save.withColumn("re_string_field3", F.lit("null"))
        self.df_save = self.df_save.withColumn("re_int_field1", F.lit(0))
        self.df_save = self.df_save.withColumn("re_int_field2", F.lit(0))
        self.df_save = self.df_save.withColumn("re_int_field3", F.lit(0))
        self.df_save = self.df_save.withColumn("site_name", F.lit(self.site_name))
        self.df_save = self.df_save.withColumn("date_type", F.lit(self.date_type))
        self.df_save = self.df_save.withColumn("date_info", F.lit(self.date_info))
        self.df_save.show(10, truncate=False)

    def handle_data(self):
        self.get_st_asin_detail()
        self.get_st_attribute()
        self.get_img_type()
        self.get_asin_count()
        self.get_price_range()
        self.get_ao_range()
        self.get_st_buy_box()
        self.get_st_color()
        self.get_launch_time_range()
        self.get_comments_num_range()
        self.get_st_asin_seller()
        self.get_top20_asin()
        self.handle_data_group()


if __name__ == '__main__':
    site_name = sys.argv[1]  # 参数1：站点
    date_type = sys.argv[2]  # 参数2：类型：week/4_week/month/quarter
    date_info = sys.argv[3]  # 参数3：年-周/年-月/年-季, 比如: 2022-1
    handle_obj = DwtAbaStAnalyticsReport(site_name=site_name, date_type=date_type, date_info=date_info)
    handle_obj.run()