Commit 0ed9c4bd by wangjing

no message

parent 417b6578
......@@ -12,7 +12,9 @@ if __name__ == '__main__':
assert site_name is not None, "sitename 不能为空!"
assert date_info is not None, "date_info 不能为空!"
db_type = "mysql"
year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all':
query = f"""
select
......@@ -26,7 +28,7 @@ if __name__ == '__main__':
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
from {site_name}_bs_top100_asin_{year}
where 1 = 1
and \$CONDITIONS
"""
......@@ -43,7 +45,7 @@ if __name__ == '__main__':
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
from {site_name}_bs_top100_asin_{year}
where 1 = 1
and date_info = '{date_info}'
and \$CONDITIONS
......
......@@ -8,11 +8,15 @@ from utils.secure_db_client import get_remote_engine
if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None)
date_info = CommonUtil.get_sys_arg(2, None)
assert site_name is not None, "sitename 不能为空!"
date_type = CommonUtil.get_sys_arg(2, None)
date_info = CommonUtil.get_sys_arg(3, None)
assert site_name is not None, "site_name 不能为空!"
assert date_type is not None, "date_type 不能为空!"
assert date_info is not None, "date_info 不能为空!"
db_type = "mysql"
year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all':
query = f"""
select
......@@ -20,13 +24,14 @@ if __name__ == '__main__':
asin,
cate_1_id,
cate_current_id,
category_id,
bsr_rank,
price,
rating,
total_comments,
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
created_at as created_at,
updated_at as updated_at
from {site_name}_bs_top100_asin_{year}
where 1 = 1
and \$CONDITIONS
"""
......@@ -37,23 +42,26 @@ if __name__ == '__main__':
asin,
cate_1_id,
cate_current_id,
category_id,
bsr_rank,
price,
rating,
total_comments,
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
created_at as created_at,
updated_at as updated_at
from {site_name}_bs_top100_asin_{year}
where 1 = 1
and date_info = '{date_info}'
and \$CONDITIONS
"""
hive_tb = "ods_bs_category_top100_asin"
hive_tb = "ods_bs_top100_asin"
partition_dict = {
"site_name": site_name
"site_name": site_name,
"date_type": date_type,
"date_info": date_info,
}
hdfs_path = CommonUtil.build_hdfs_path(hive_tb, partition_dict=partition_dict)
# hdfs_path = CommonUtil.build_hdfs_path(hive_tb, partition_dict=partition_dict)
engine = get_remote_engine(
site_name=site_name,
......@@ -63,7 +71,7 @@ if __name__ == '__main__':
engine.sqoop_raw_import(
query=query,
hive_table=hive_tb,
hdfs_path=hdfs_path,
# hdfs_path=hdfs_path,
partitions=partition_dict
)
......
......@@ -11,8 +11,9 @@ if __name__ == '__main__':
date_info = CommonUtil.get_sys_arg(2, None)
assert site_name is not None, "sitename 不能为空!"
assert date_info is not None, "date_info 不能为空!"
db_type = "mysql"
year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all':
query = f"""
select
......@@ -26,7 +27,7 @@ if __name__ == '__main__':
created_at as updated_at,
date_info,
category_id
from {site_name}_new_releases_top100_asin
from {site_name}_nsr_top100_asin_{year}
where 1 = 1
and \$CONDITIONS
"""
......@@ -43,7 +44,7 @@ if __name__ == '__main__':
created_at as updated_at,
date_info,
category_id
from {site_name}_new_releases_top100_asin
from {site_name}_nsr_top100_asin_{year}
where 1 = 1
and date_info = '{date_info}'
and \$CONDITIONS
......
......@@ -10,11 +10,12 @@ if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None)
date_type = CommonUtil.get_sys_arg(2, None)
date_info = CommonUtil.get_sys_arg(3, None)
assert site_name is not None, "sitename 不能为空!"
assert date_type is not None, "sitename 不能为空!"
assert site_name is not None, "site_name 不能为空!"
assert date_type is not None, "date_type 不能为空!"
assert date_info is not None, "date_info 不能为空!"
db_type = "mysql"
year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all':
query = f"""
select
......@@ -22,13 +23,14 @@ if __name__ == '__main__':
asin,
cate_1_id,
cate_current_id,
category_id,
bsr_rank,
price,
rating,
total_comments,
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
created_at as created_at,
updated_at as updated_at
from {site_name}_nsr_top100_asin_{year}
where 1 = 1
and \$CONDITIONS
"""
......@@ -39,19 +41,20 @@ if __name__ == '__main__':
asin,
cate_1_id,
cate_current_id,
category_id,
bsr_rank,
price,
rating,
total_comments,
created_at as updated_at,
date_info,
category_id
from {site_name}_bs_category_top100_asin
created_at as created_at,
updated_at as updated_at
from {site_name}_nsr_top100_asin_{year}
where 1 = 1
and date_info = '{date_info}'
and \$CONDITIONS
"""
hive_tb = "ods_bs_category_top100_asin"
hive_tb = "ods_nsr_top100_asin"
partition_dict = {
"site_name": site_name,
"date_type": date_type,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment