Commit 5655a77a by chenyuanjie

fix

parent 6d70e970
......@@ -29,7 +29,8 @@ if __name__ == '__main__':
WHERE site_name = '{site_name}'
"""
print(f"sql=\n{sql}")
df_all = spark.sql(sqlQuery=sql)
df_all = spark.sql(sqlQuery=sql).cache()
print(f"全量读取 keepa 数据:{df_all.count()}")
# 2. Spark 端过滤超过 3 个月的 asin(数据读取后处理,不在 Hive SQL 中算)
three_months_ago = (datetime.now() - relativedelta(months=3)).strftime('%Y-%m-%d %H:%M:%S')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment