Commit 9e144349 by hejiangming

no message

parent 160f062d
...@@ -37,12 +37,15 @@ if __name__ == '__main__': ...@@ -37,12 +37,15 @@ if __name__ == '__main__':
with engine.connect() as connection: with engine.connect() as connection:
sql = f""" sql = f"""
drop table if exists {export_tb_rel}; drop table if exists {export_tb_rel};
create table if not exists {export_tb_rel} create table if not exists {export_tb_rel}
( (
like {export_tb_before} including comments like {export_tb_before} including comments
); );
ALTER TABLE {export_tb_rel} ALTER COLUMN st_movie_brand_label TYPE VARCHAR(20); ALTER TABLE {export_tb_rel} ALTER COLUMN st_movie_brand_label TYPE VARCHAR(20);
ALTER TABLE {export_tb_rel} ALTER COLUMN total_appear_month TYPE VARCHAR(50); ALTER TABLE {export_tb_rel} ALTER COLUMN total_appear_month TYPE VARCHAR(50);
-- st_attribute_label 在正式表是 VARCHAR[],Sqoop 无法直接写入数组类型
-- 先临时改成 VARCHAR(200) 让 Sqoop 写字符串,交换完成后再 ALTER 回 VARCHAR[]
ALTER TABLE {export_tb_rel} ALTER COLUMN st_attribute_label TYPE VARCHAR(200);
""" """
print("================================执行sql================================") print("================================执行sql================================")
print(sql) print(sql)
...@@ -187,7 +190,9 @@ if __name__ == '__main__': ...@@ -187,7 +190,9 @@ if __name__ == '__main__':
"market_cycle_type", "market_cycle_type",
"rank_lastest", "rank_lastest",
"rank_change_rate_lastest", "rank_change_rate_lastest",
"rank_rate_of_change_lastest" "rank_rate_of_change_lastest",
# 搜索词属性标签(12 个月合并去重,逗号分隔字符串,sqoop 导出 PG 后转 VARCHAR[])
"st_attribute_label"
], ],
partition_dict={ partition_dict={
"site_name": site_name, "site_name": site_name,
...@@ -216,6 +221,13 @@ if __name__ == '__main__': ...@@ -216,6 +221,13 @@ if __name__ == '__main__':
ALTER COLUMN total_appear_month TYPE INTEGER[] ALTER COLUMN total_appear_month TYPE INTEGER[]
USING string_to_array(total_appear_month, ',')::int[]; USING string_to_array(total_appear_month, ',')::int[];
-- 交换完成后,把 st_attribute_label 从 VARCHAR 转回 VARCHAR[]
-- 用 string_to_array 把 Sqoop 写入的逗号串(如 "材质,颜色")拆成数组(如 {{材质,颜色}})
-- 词典无匹配的词在 PySpark 端已 fillna "-1",转换后是 {{-1}},与 Java 占位约定一致
ALTER TABLE {export_tb_before}
ALTER COLUMN st_attribute_label TYPE VARCHAR[]
USING string_to_array(st_attribute_label, ',')::varchar[];
alter table {export_tb_before} drop if exists keyword_tsv; alter table {export_tb_before} drop if exists keyword_tsv;
alter table {export_tb_before} add column keyword_tsv tsvector generated always as (to_tsvector('english_amazonword', search_term)) STORED; alter table {export_tb_before} add column keyword_tsv tsvector generated always as (to_tsvector('english_amazonword', search_term)) STORED;
drop index if exists {export_tb_before}_keyword_tsv_idx; drop index if exists {export_tb_before}_keyword_tsv_idx;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment