Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
13f4a99e
Commit
13f4a99e
authored
Jun 17, 2025
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
关联流量-同步内部asin数据
parent
6c0b5a67
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
52 additions
and
0 deletions
+52
-0
ods_self_asin_related_traffic.py
Pyspark_job/sqoop_import/ods_self_asin_related_traffic.py
+52
-0
No files found.
Pyspark_job/sqoop_import/ods_self_asin_related_traffic.py
0 → 100644
View file @
13f4a99e
import
os
import
sys
sys
.
path
.
append
(
os
.
path
.
dirname
(
sys
.
path
[
0
]))
from
utils.ssh_util
import
SSHUtil
from
utils.common_util
import
CommonUtil
from
utils.hdfs_utils
import
HdfsUtils
if
__name__
==
'__main__'
:
site_name
=
CommonUtil
.
get_sys_arg
(
1
,
None
)
assert
site_name
is
not
None
,
"site_name 不能为空!"
hive_table
=
"ods_self_asin_related_traffic"
partition_dict
=
{
"site_name"
:
site_name
}
hdfs_path
=
CommonUtil
.
build_hdfs_path
(
hive_table
,
partition_dict
=
partition_dict
)
print
(
f
"hdfs_path is {hdfs_path}"
)
db_type
=
'mysql'
import_table
=
f
"{site_name}_self_asin_detail"
sql_query
=
f
"""
select
id,
asin,
together_asin,
sp_initial_seen_asins_json,
sp_4stars_initial_seen_asins_json,
sp_delivery_initial_seen_asins_json,
compare_similar_asin_json,
result_list_json,
updated_at
from {import_table}
where site = '{site_name}'
and DATE(updated_at) >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
and
\
$CONDITIONS
"""
# 生成导出脚本
import_sh
=
CommonUtil
.
build_import_sh
(
site_name
=
site_name
,
db_type
=
db_type
,
query
=
sql_query
,
hdfs_path
=
hdfs_path
,
map_num
=
25
,
key
=
'id'
)
# 导入前先删除原始hdfs数据
HdfsUtils
.
delete_hdfs_file
(
hdfs_path
)
# 创建ssh Client对象--用于执行cmd命令
client
=
SSHUtil
.
get_ssh_client
()
SSHUtil
.
exec_command_async
(
client
,
import_sh
,
ignore_err
=
False
)
# 创建lzo索引和修复元数据
CommonUtil
.
after_import
(
hdfs_path
=
hdfs_path
,
hive_tb
=
hive_table
)
# 关闭链接
client
.
close
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment