Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
0c315139
Commit
0c315139
authored
May 13, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
每日榜单asin指标写入Doris
parent
957e48f8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
27 deletions
+23
-27
dwd_nsr_bsr_keepa_asin.py
Pyspark_job/dwd/dwd_nsr_bsr_keepa_asin.py
+23
-27
No files found.
Pyspark_job/dwd/dwd_nsr_bsr_keepa_asin.py
View file @
0c315139
...
@@ -19,6 +19,7 @@ from utils.common_util import CommonUtil
...
@@ -19,6 +19,7 @@ from utils.common_util import CommonUtil
from
datetime
import
datetime
,
timedelta
from
datetime
import
datetime
,
timedelta
from
utils.hdfs_utils
import
HdfsUtils
from
utils.hdfs_utils
import
HdfsUtils
from
utils.secure_db_client
import
get_remote_engine
from
utils.secure_db_client
import
get_remote_engine
from
utils.DorisHelper
import
DorisHelper
# 数字/虚拟类目排除列表 — 筛选条件用
# 数字/虚拟类目排除列表 — 筛选条件用
EXCLUDE_CATEGORIES
=
(
EXCLUDE_CATEGORIES
=
(
...
@@ -264,33 +265,28 @@ class DwdNsrBsrKeepaAsin(Templates):
...
@@ -264,33 +265,28 @@ class DwdNsrBsrKeepaAsin(Templates):
CommonUtil
()
.
send_wx_msg
(
users
=
users
,
title
=
title
,
content
=
content
)
CommonUtil
()
.
send_wx_msg
(
users
=
users
,
title
=
title
,
content
=
content
)
if
self
.
date_type
==
'day'
:
if
self
.
date_type
==
'day'
:
# 更新30day分区
# 写入 Doris selection.dim_asin_source_flag(按 date_info 日分区,dynamic_partition 自动清理过期分区)
hdfs_day
=
CommonUtil
.
build_hdfs_path
(
self
.
db_save_cate
,
{
"site_name"
:
self
.
site_name
,
"date_type"
:
self
.
date_type
,
"date_info"
:
self
.
date_info
})
print
(
f
"写入 Doris selection.dim_asin_source_flag, date_info={self.date_info}"
)
hdfs_30day
=
CommonUtil
.
build_hdfs_path
(
self
.
db_save_cate
,
{
"site_name"
:
self
.
site_name
,
"date_type"
:
"30day"
,
"date_info"
:
"1970-01"
})
df_to_doris
=
self
.
df_save_asin_cate
.
select
(
hdfs_30day_copy
=
CommonUtil
.
build_hdfs_path
(
self
.
db_save_cate
,
{
"site_name"
:
self
.
site_name
,
"date_type"
:
"30day_copy"
,
"date_info"
:
"1970-01"
})
F
.
col
(
"date_info"
)
.
cast
(
"date"
)
.
alias
(
"date_info"
),
if
not
HdfsUtils
.
path_exist
(
hdfs_day
):
"asin"
,
print
(
f
"源目录不存在: {hdfs_day}"
)
"site_name"
,
wx_users
=
[
'fangxingjun'
,
'chenyuanjie'
]
F
.
to_json
(
wx_msg
=
f
"{hdfs_day} 目录数据不存在,请检查!"
F
.
expr
(
"transform(split(asin_cate_flag, ','), x -> cast(x as int))"
)
CommonUtil
.
send_wx_msg
(
wx_users
,
"复制数据至30day分区"
,
wx_msg
)
)
.
alias
(
"asin_cate_flag"
),
sys
.
exit
(
1
)
F
.
col
(
"bsr_latest_date"
)
.
cast
(
"date"
)
.
alias
(
"bsr_latest_date"
),
if
HdfsUtils
.
path_exist
(
hdfs_30day_copy
):
"bsr_30day_count"
,
print
(
f
"中间目录已存在,先清空: {hdfs_30day_copy}"
)
F
.
col
(
"nsr_latest_date"
)
.
cast
(
"date"
)
.
alias
(
"nsr_latest_date"
),
HdfsUtils
.
delete_file_in_folder
(
hdfs_30day_copy
)
"nsr_30day_count"
,
else
:
)
print
(
f
"中间目录不存在,创建: {hdfs_30day_copy}"
)
table_columns
=
"date_info, asin, site_name, asin_cate_flag, bsr_latest_date, bsr_30day_count, nsr_latest_date, nsr_30day_count"
HdfsUtils
.
create_if_not_exist
(
hdfs_30day_copy
)
DorisHelper
.
spark_export_with_columns
(
os
.
system
(
f
"hdfs dfs -cp {hdfs_day}/* {hdfs_30day_copy}/"
)
df_save
=
df_to_doris
,
files
=
HdfsUtils
.
read_list
(
hdfs_30day_copy
)
db_name
=
'selection'
,
print
(
f
"中间目录文件数: {len(files) if files else 0}"
)
table_name
=
'dim_asin_source_flag'
,
if
not
HdfsUtils
.
path_exist
(
hdfs_30day
):
table_columns
=
table_columns
,
print
(
f
"目标目录不存在,创建: {hdfs_30day}"
)
)
HdfsUtils
.
create_if_not_exist
(
hdfs_30day
)
print
(
"Doris selection.dim_asin_source_flag 写入完毕"
)
HdfsUtils
.
exchange_path
(
hdfs_30day_copy
,
hdfs_30day
)
print
(
f
"交换完成! 30day与30day_copy已互换"
)
# 4. 修复hive元数据
CommonUtil
.
hive_cmd_exec
(
f
"set hive.msck.path.validation=ignore; msck repair table big_data_selection.{self.db_save_cate};"
)
# def save_data(self):
# def save_data(self):
# pass
# pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment