Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
9fdbc695
Commit
9fdbc695
authored
Feb 26, 2026
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no message
parent
49f49ef3
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
129 additions
and
0 deletions
+129
-0
ods_self_asin_detail_new.py
Pyspark_job/sqoop_import/ods_self_asin_detail_new.py
+129
-0
No files found.
Pyspark_job/sqoop_import/ods_self_asin_detail_new.py
0 → 100644
View file @
9fdbc695
import
os
import
sys
sys
.
path
.
append
(
os
.
path
.
dirname
(
sys
.
path
[
0
]))
from
utils.common_util
import
CommonUtil
from
utils.secure_db_client
import
get_remote_engine
if
__name__
==
'__main__'
:
site_name
=
CommonUtil
.
get_sys_arg
(
1
,
None
)
date_type
=
CommonUtil
.
get_sys_arg
(
2
,
None
)
date_info
=
CommonUtil
.
get_sys_arg
(
3
,
None
)
assert
site_name
is
not
None
,
"site_name 不能为空!"
assert
date_type
is
not
None
,
"date_type 不能为空!"
assert
date_info
is
not
None
,
"date_info 不能为空!"
db_type
=
'postgresql_15'
import_table
=
f
"{site_name}_self_asin_detail_{2025}"
hive_table
=
"ods_self_asin_detail"
partition_dict
=
{
"site_name"
:
site_name
,
"date_type"
:
date_type
,
"date_info"
:
date_info
}
engine
=
get_remote_engine
(
site_name
=
site_name
,
db_type
=
db_type
)
query
=
f
"""
SELECT
asin,
REPLACE(REPLACE(REPLACE(img_url, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS img_url,
REPLACE(REPLACE(REPLACE(title, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS title,
title_len,
price,
rating,
total_comments,
buy_box_seller_type,
page_inventory,
REPLACE(REPLACE(REPLACE(category, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS category,
REPLACE(REPLACE(REPLACE(volume, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS volume,
weight,
rank,
launch_time,
REPLACE(REPLACE(REPLACE(video_url, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS video_url,
REPLACE(REPLACE(REPLACE(add_url, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS add_url,
REPLACE(REPLACE(REPLACE(material, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS material,
img_num,
REPLACE(REPLACE(REPLACE(img_type, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS img_type,
qa_num,
REPLACE(REPLACE(REPLACE(brand, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS brand,
REPLACE(REPLACE(REPLACE(ac_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS ac_name,
REPLACE(REPLACE(REPLACE(node_id, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS node_id,
REPLACE(REPLACE(REPLACE(sp_num, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_num,
REPLACE(REPLACE(REPLACE(mpn, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS mpn,
online_time,
REPLACE(REPLACE(REPLACE(describe, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS describe,
one_star,
two_star,
three_star,
four_star,
five_star,
low_star,
asin_type,
REPLACE(REPLACE(REPLACE(is_coupon, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS is_coupon,
REPLACE(REPLACE(REPLACE(search_category, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS search_category,
REPLACE(REPLACE(REPLACE(weight_str, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS weight_str,
REPLACE(REPLACE(REPLACE(account_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS account_name,
REPLACE(REPLACE(REPLACE(other_seller_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS other_seller_name,
REPLACE(REPLACE(REPLACE(bsr_date_info, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS bsr_date_info,
REPLACE(REPLACE(REPLACE(account_id, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS account_id,
REPLACE(REPLACE(REPLACE(package_quantity, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS package_quantity,
REPLACE(REPLACE(REPLACE(pattern_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS pattern_name,
REPLACE(REPLACE(REPLACE(together_asin, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS together_asin,
REPLACE(REPLACE(REPLACE(activity_type, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS activity_type,
one_two_val,
three_four_val,
five_six_val,
eight_val,
REPLACE(REPLACE(REPLACE(product_description, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS product_description,
follow_sellers,
REPLACE(REPLACE(REPLACE(buy_sales, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS buy_sales,
image_view,
REPLACE(REPLACE(REPLACE(product_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS product_json,
REPLACE(REPLACE(REPLACE(productdetail_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS productdetail_json,
REPLACE(REPLACE(REPLACE(review_ai_text, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS review_ai_text,
REPLACE(REPLACE(REPLACE(review_label_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS review_label_json,
REPLACE(REPLACE(REPLACE(lob_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS lob_asin_json,
REPLACE(REPLACE(REPLACE(sp_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_4stars_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_4stars_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_delivery_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_delivery_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(compare_similar_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS compare_similar_asin_json,
REPLACE(REPLACE(REPLACE(customer_reviews_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS customer_reviews_json,
REPLACE(REPLACE(REPLACE(together_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS together_asin_json,
REPLACE(REPLACE(REPLACE(min_match_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS min_match_asin_json,
REPLACE(REPLACE(REPLACE(seller_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS seller_json,
returns,
created_at, updated_at,
REPLACE(REPLACE(REPLACE(result_list_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS result_list_json,
REPLACE(REPLACE(REPLACE(variat_list, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS variat_list,
REPLACE(REPLACE(REPLACE(bundle_asin_component_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS bundle_asin_component_json,
REPLACE(REPLACE(REPLACE(cart_type, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS cart_type
FROM {import_table}
WHERE date_info='{date_info}' AND
\
$CONDITIONS
"""
# spider_int,
# variat_num,
# REPLACE(REPLACE(REPLACE(img_list, E'\n',' '), E'\r',' '), E'\t',' ') AS img_list,
# REPLACE(REPLACE(REPLACE(parent_asin, E'\n',' '), E'\r',' '), E'\t',' ') AS parent_asin,
# REPLACE(REPLACE(REPLACE(video_m3u8_url, E'\n',' '), E'\r',' '), E'\t',' ') AS video_m3u8_url,
# REPLACE(REPLACE(REPLACE(review_json_list, E'\n',' '), E'\r',' '), E'\t',' ') AS review_json_list,
# fbm_delivery_price
engine
.
sqoop_raw_import
(
# query=f"SELECT {cols} FROM {import_table} WHERE 1=1 and \$CONDITIONS",
query
=
query
,
hive_table
=
hive_table
,
# hdfs_path=hdfs_path,
partitions
=
partition_dict
,
# m=50,
# split_by='id'
)
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment