Commit 0ed9c4bd by wangjing

no message

parent 417b6578
...@@ -12,7 +12,9 @@ if __name__ == '__main__': ...@@ -12,7 +12,9 @@ if __name__ == '__main__':
assert site_name is not None, "sitename 不能为空!" assert site_name is not None, "sitename 不能为空!"
assert date_info is not None, "date_info 不能为空!" assert date_info is not None, "date_info 不能为空!"
db_type = "mysql" year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all': if date_info == 'all':
query = f""" query = f"""
select select
...@@ -26,7 +28,7 @@ if __name__ == '__main__': ...@@ -26,7 +28,7 @@ if __name__ == '__main__':
created_at as updated_at, created_at as updated_at,
date_info, date_info,
category_id category_id
from {site_name}_bs_category_top100_asin from {site_name}_bs_top100_asin_{year}
where 1 = 1 where 1 = 1
and \$CONDITIONS and \$CONDITIONS
""" """
...@@ -43,7 +45,7 @@ if __name__ == '__main__': ...@@ -43,7 +45,7 @@ if __name__ == '__main__':
created_at as updated_at, created_at as updated_at,
date_info, date_info,
category_id category_id
from {site_name}_bs_category_top100_asin from {site_name}_bs_top100_asin_{year}
where 1 = 1 where 1 = 1
and date_info = '{date_info}' and date_info = '{date_info}'
and \$CONDITIONS and \$CONDITIONS
......
...@@ -8,11 +8,15 @@ from utils.secure_db_client import get_remote_engine ...@@ -8,11 +8,15 @@ from utils.secure_db_client import get_remote_engine
if __name__ == '__main__': if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None) site_name = CommonUtil.get_sys_arg(1, None)
date_info = CommonUtil.get_sys_arg(2, None) date_type = CommonUtil.get_sys_arg(2, None)
assert site_name is not None, "sitename 不能为空!" date_info = CommonUtil.get_sys_arg(3, None)
assert site_name is not None, "site_name 不能为空!"
assert date_type is not None, "date_type 不能为空!"
assert date_info is not None, "date_info 不能为空!" assert date_info is not None, "date_info 不能为空!"
db_type = "mysql" year = date_info.split("-")[0]
# db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all': if date_info == 'all':
query = f""" query = f"""
select select
...@@ -20,13 +24,14 @@ if __name__ == '__main__': ...@@ -20,13 +24,14 @@ if __name__ == '__main__':
asin, asin,
cate_1_id, cate_1_id,
cate_current_id, cate_current_id,
category_id,
bsr_rank, bsr_rank,
price,
rating, rating,
total_comments, total_comments,
created_at as updated_at, created_at as created_at,
date_info, updated_at as updated_at
category_id from {site_name}_bs_top100_asin_{year}
from {site_name}_bs_category_top100_asin
where 1 = 1 where 1 = 1
and \$CONDITIONS and \$CONDITIONS
""" """
...@@ -37,23 +42,26 @@ if __name__ == '__main__': ...@@ -37,23 +42,26 @@ if __name__ == '__main__':
asin, asin,
cate_1_id, cate_1_id,
cate_current_id, cate_current_id,
category_id,
bsr_rank, bsr_rank,
price,
rating, rating,
total_comments, total_comments,
created_at as updated_at, created_at as created_at,
date_info, updated_at as updated_at
category_id from {site_name}_bs_top100_asin_{year}
from {site_name}_bs_category_top100_asin
where 1 = 1 where 1 = 1
and date_info = '{date_info}' and date_info = '{date_info}'
and \$CONDITIONS and \$CONDITIONS
""" """
hive_tb = "ods_bs_category_top100_asin" hive_tb = "ods_bs_top100_asin"
partition_dict = { partition_dict = {
"site_name": site_name "site_name": site_name,
"date_type": date_type,
"date_info": date_info,
} }
hdfs_path = CommonUtil.build_hdfs_path(hive_tb, partition_dict=partition_dict) # hdfs_path = CommonUtil.build_hdfs_path(hive_tb, partition_dict=partition_dict)
engine = get_remote_engine( engine = get_remote_engine(
site_name=site_name, site_name=site_name,
...@@ -63,7 +71,7 @@ if __name__ == '__main__': ...@@ -63,7 +71,7 @@ if __name__ == '__main__':
engine.sqoop_raw_import( engine.sqoop_raw_import(
query=query, query=query,
hive_table=hive_tb, hive_table=hive_tb,
hdfs_path=hdfs_path, # hdfs_path=hdfs_path,
partitions=partition_dict partitions=partition_dict
) )
......
...@@ -11,8 +11,9 @@ if __name__ == '__main__': ...@@ -11,8 +11,9 @@ if __name__ == '__main__':
date_info = CommonUtil.get_sys_arg(2, None) date_info = CommonUtil.get_sys_arg(2, None)
assert site_name is not None, "sitename 不能为空!" assert site_name is not None, "sitename 不能为空!"
assert date_info is not None, "date_info 不能为空!" assert date_info is not None, "date_info 不能为空!"
year = date_info.split("-")[0]
db_type = "mysql" # db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all': if date_info == 'all':
query = f""" query = f"""
select select
...@@ -26,7 +27,7 @@ if __name__ == '__main__': ...@@ -26,7 +27,7 @@ if __name__ == '__main__':
created_at as updated_at, created_at as updated_at,
date_info, date_info,
category_id category_id
from {site_name}_new_releases_top100_asin from {site_name}_nsr_top100_asin_{year}
where 1 = 1 where 1 = 1
and \$CONDITIONS and \$CONDITIONS
""" """
...@@ -43,7 +44,7 @@ if __name__ == '__main__': ...@@ -43,7 +44,7 @@ if __name__ == '__main__':
created_at as updated_at, created_at as updated_at,
date_info, date_info,
category_id category_id
from {site_name}_new_releases_top100_asin from {site_name}_nsr_top100_asin_{year}
where 1 = 1 where 1 = 1
and date_info = '{date_info}' and date_info = '{date_info}'
and \$CONDITIONS and \$CONDITIONS
......
...@@ -10,11 +10,12 @@ if __name__ == '__main__': ...@@ -10,11 +10,12 @@ if __name__ == '__main__':
site_name = CommonUtil.get_sys_arg(1, None) site_name = CommonUtil.get_sys_arg(1, None)
date_type = CommonUtil.get_sys_arg(2, None) date_type = CommonUtil.get_sys_arg(2, None)
date_info = CommonUtil.get_sys_arg(3, None) date_info = CommonUtil.get_sys_arg(3, None)
assert site_name is not None, "sitename 不能为空!" assert site_name is not None, "site_name 不能为空!"
assert date_type is not None, "sitename 不能为空!" assert date_type is not None, "date_type 不能为空!"
assert date_info is not None, "date_info 不能为空!" assert date_info is not None, "date_info 不能为空!"
year = date_info.split("-")[0]
db_type = "mysql" # db_type = "mysql"
db_type = "postgresql_14"
if date_info == 'all': if date_info == 'all':
query = f""" query = f"""
select select
...@@ -22,13 +23,14 @@ if __name__ == '__main__': ...@@ -22,13 +23,14 @@ if __name__ == '__main__':
asin, asin,
cate_1_id, cate_1_id,
cate_current_id, cate_current_id,
category_id,
bsr_rank, bsr_rank,
price,
rating, rating,
total_comments, total_comments,
created_at as updated_at, created_at as created_at,
date_info, updated_at as updated_at
category_id from {site_name}_nsr_top100_asin_{year}
from {site_name}_bs_category_top100_asin
where 1 = 1 where 1 = 1
and \$CONDITIONS and \$CONDITIONS
""" """
...@@ -39,19 +41,20 @@ if __name__ == '__main__': ...@@ -39,19 +41,20 @@ if __name__ == '__main__':
asin, asin,
cate_1_id, cate_1_id,
cate_current_id, cate_current_id,
category_id,
bsr_rank, bsr_rank,
price,
rating, rating,
total_comments, total_comments,
created_at as updated_at, created_at as created_at,
date_info, updated_at as updated_at
category_id from {site_name}_nsr_top100_asin_{year}
from {site_name}_bs_category_top100_asin
where 1 = 1 where 1 = 1
and date_info = '{date_info}' and date_info = '{date_info}'
and \$CONDITIONS and \$CONDITIONS
""" """
hive_tb = "ods_bs_category_top100_asin" hive_tb = "ods_nsr_top100_asin"
partition_dict = { partition_dict = {
"site_name": site_name, "site_name": site_name,
"date_type": date_type, "date_type": date_type,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment