es月索引增加利润率字段

40fcc4ea · chenyuanjie · 31f19f7e · 40fcc4ea · 40fcc4ea
Commit 40fcc4ea authored Feb 27, 2026 by chenyuanjie
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 3 deletions

es_flow_asin.py Pyspark_job/export_es/es_flow_asin.py +20 -3

es_util.py Pyspark_job/utils/es_util.py +14 -0

No files found.
--- a/Pyspark_job/export_es/es_flow_asin.py
+++ b/Pyspark_job/export_es/es_flow_asin.py
@@ -26,6 +26,7 @@ class EsStDetail(TemplatesMysql):
            use_db="big_data_selection")
        # DataFrame对象初始化
        self.df_synchronize = self.spark.sql("select 1+1")
+        self.df_profit_rate = self.spark.sql("select 1+1")
        if self.date_type == '4_week':
            self.cur_date = self.get_date_from_week()
        else:
@@ -102,13 +103,29 @@ class EsStDetail(TemplatesMysql):
            from {self.table_name} where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}'
        """
        print("sql:", sql)
-        self.df_synchronize = self.spark.sql(sqlQuery=sql)
+        self.df_synchronize = self.spark.sql(sqlQuery=sql).repartition(40, 'asin')
-        self.df_synchronize = self.df_synchronize.repartition(40).withColumn(
+        sql = f"""
+            select asin, price, ocean_profit, air_profit from dim_asin_profit_rate_info where site_name='{self.site_name}'
+        """
+        print("sql:", sql)
+        self.df_profit_rate = self.spark.sql(sqlQuery=sql).repartition(40, 'asin').withColumn(
+            "profit_rate_extra",
+            F.struct(
+                F.col("ocean_profit").alias("ocean_profit"),
+                F.col("air_profit").alias("air_profit")
+            )
+        ).drop("ocean_profit", "air_profit")
+        self.df_synchronize = self.df_synchronize.join(
+            self.df_profit_rate, on=['asin', 'price'], how='left'
+        ).withColumn(
            "img_type", F.split(F.col("img_type"), ",")
        ).withColumn(
            "img_type", F.expr("transform(img_type, x -> cast(x as int))")
+        ).withColumn(
+            'profit_key', F.concat_ws("_", F.col("asin"), F.col("price"))
        ).cache()
-        self.df_synchronize.show(10, truncate=False)
    # 同步数据前的准备工作
    def es_prepare(self):

--- a/Pyspark_job/utils/es_util.py
+++ b/Pyspark_job/utils/es_util.py
@@ -485,6 +485,20 @@ class EsUtils(object):
                    },
                    "tracking_since_type": {
                        "type": "short"
+                    },
+                    "profit_key": {
+                        "type": "keyword"
+                    },
+                    "profit_rate_extra": {
+                        "type": "object",
+                        "properties": {
+                            "ocean_profit": {
+                                "type": "float"
+                            },
+                            "air_profit": {
+                                "type": "float"
+                            }
+                        }
                    }
                }
            }