利润率导出pg

5efdfc6e · chenyuanjie · 415179d3 · 5efdfc6e
Commit 5efdfc6e authored May 25, 2026 by chenyuanjie
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 2 deletions

export_need_profit_rate.py Pyspark_job/script/export_need_profit_rate.py +8 -2

No files found.
--- a/Pyspark_job/script/export_need_profit_rate.py
+++ b/Pyspark_job/script/export_need_profit_rate.py
@@ -87,15 +87,21 @@ class ExportNeedProfitRate(object):
        # 5. keepa 当日增量 INNER JOIN
        # keepa 表已整合为单分区快照，用 updated_time > last_date_info 筛"近一天更新"的增量
+        # weight 取数规则：优先 item_weight；item_weight <= 0 时用 package_weight 兜底；都不大于 0 → 过滤
        sql_keepa = f"""
-            SELECT asin, package_length, package_width, package_height, item_weight AS weight
+            SELECT asin, package_length, package_width, package_height,
+                   CASE WHEN item_weight > 0    THEN item_weight
+                        WHEN package_weight > 0 THEN package_weight
+                        ELSE NULL
+                   END AS weight
            FROM dim_keepa_asin_info
            WHERE site_name = '{self.site_name}' AND updated_time >= '{self.last_date_info}'
        """
        df_keepa = self.spark.sql(sqlQuery=sql_keepa) \
            .filter((F.col('package_length') > 0) &
                    (F.col('package_width') > 0) &
-                    (F.col('package_height') > 0)) \
+                    (F.col('package_height') > 0) &
+                    (F.col('weight') > 0)) \
            .repartition(40, 'asin')
        df_result = df_flow \