Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
9e144349
Commit
9e144349
authored
Jun 09, 2026
by
hejiangming
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
160f062d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
2 deletions
+14
-2
dwt_aba_last365.py
Pyspark_job/sqoop_export/dwt_aba_last365.py
+14
-2
No files found.
Pyspark_job/sqoop_export/dwt_aba_last365.py
View file @
9e144349
...
@@ -37,12 +37,15 @@ if __name__ == '__main__':
...
@@ -37,12 +37,15 @@ if __name__ == '__main__':
with
engine
.
connect
()
as
connection
:
with
engine
.
connect
()
as
connection
:
sql
=
f
"""
sql
=
f
"""
drop table if exists {export_tb_rel};
drop table if exists {export_tb_rel};
create table if not exists {export_tb_rel}
create table if not exists {export_tb_rel}
(
(
like {export_tb_before} including comments
like {export_tb_before} including comments
);
);
ALTER TABLE {export_tb_rel} ALTER COLUMN st_movie_brand_label TYPE VARCHAR(20);
ALTER TABLE {export_tb_rel} ALTER COLUMN st_movie_brand_label TYPE VARCHAR(20);
ALTER TABLE {export_tb_rel} ALTER COLUMN total_appear_month TYPE VARCHAR(50);
ALTER TABLE {export_tb_rel} ALTER COLUMN total_appear_month TYPE VARCHAR(50);
-- st_attribute_label 在正式表是 VARCHAR[],Sqoop 无法直接写入数组类型
-- 先临时改成 VARCHAR(200) 让 Sqoop 写字符串,交换完成后再 ALTER 回 VARCHAR[]
ALTER TABLE {export_tb_rel} ALTER COLUMN st_attribute_label TYPE VARCHAR(200);
"""
"""
print
(
"================================执行sql================================"
)
print
(
"================================执行sql================================"
)
print
(
sql
)
print
(
sql
)
...
@@ -187,7 +190,9 @@ if __name__ == '__main__':
...
@@ -187,7 +190,9 @@ if __name__ == '__main__':
"market_cycle_type"
,
"market_cycle_type"
,
"rank_lastest"
,
"rank_lastest"
,
"rank_change_rate_lastest"
,
"rank_change_rate_lastest"
,
"rank_rate_of_change_lastest"
"rank_rate_of_change_lastest"
,
# 搜索词属性标签(12 个月合并去重,逗号分隔字符串,sqoop 导出 PG 后转 VARCHAR[])
"st_attribute_label"
],
],
partition_dict
=
{
partition_dict
=
{
"site_name"
:
site_name
,
"site_name"
:
site_name
,
...
@@ -216,6 +221,13 @@ if __name__ == '__main__':
...
@@ -216,6 +221,13 @@ if __name__ == '__main__':
ALTER COLUMN total_appear_month TYPE INTEGER[]
ALTER COLUMN total_appear_month TYPE INTEGER[]
USING string_to_array(total_appear_month, ',')::int[];
USING string_to_array(total_appear_month, ',')::int[];
-- 交换完成后,把 st_attribute_label 从 VARCHAR 转回 VARCHAR[]
-- 用 string_to_array 把 Sqoop 写入的逗号串(如 "材质,颜色")拆成数组(如 {{材质,颜色}})
-- 词典无匹配的词在 PySpark 端已 fillna "-1",转换后是 {{-1}},与 Java 占位约定一致
ALTER TABLE {export_tb_before}
ALTER COLUMN st_attribute_label TYPE VARCHAR[]
USING string_to_array(st_attribute_label, ',')::varchar[];
alter table {export_tb_before} drop if exists keyword_tsv;
alter table {export_tb_before} drop if exists keyword_tsv;
alter table {export_tb_before} add column keyword_tsv tsvector generated always as (to_tsvector('english_amazonword', search_term)) STORED;
alter table {export_tb_before} add column keyword_tsv tsvector generated always as (to_tsvector('english_amazonword', search_term)) STORED;
drop index if exists {export_tb_before}_keyword_tsv_idx;
drop index if exists {export_tb_before}_keyword_tsv_idx;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment