Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
f3ab9b3c
Commit
f3ab9b3c
authored
Oct 24, 2025
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修复ods_asin_detail详情表数据同步造成错位问题
parent
232a786a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
82 additions
and
2 deletions
+82
-2
ods_asin_detail.py
Pyspark_job/sqoop_import/ods_asin_detail.py
+78
-1
common_udf.py
Pyspark_job/yswg_utils/common_udf.py
+4
-1
No files found.
Pyspark_job/sqoop_import/ods_asin_detail.py
View file @
f3ab9b3c
...
...
@@ -41,8 +41,85 @@ if __name__ == '__main__':
db_type
=
db_type
)
query
=
f
"""
SELECT
id,
asin,
REPLACE(REPLACE(REPLACE(img_url, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS img_url,
REPLACE(REPLACE(REPLACE(title, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS title,
title_len,
price,
rating,
total_comments,
buy_box_seller_type,
page_inventory,
REPLACE(REPLACE(REPLACE(category, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS category,
REPLACE(REPLACE(REPLACE(volume, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS volume,
weight,
rank,
launch_time,
created_time AS created_at, updated_time AS updated_at,
category_state,
img_num,
REPLACE(REPLACE(REPLACE(img_type, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS img_type,
REPLACE(REPLACE(REPLACE(activity_type, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS activity_type,
one_two_val,
three_four_val,
five_six_val,
eight_val,
qa_num,
one_star,
two_star,
three_star,
four_star,
five_star,
low_star,
REPLACE(REPLACE(REPLACE(together_asin, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS together_asin,
REPLACE(REPLACE(REPLACE(brand, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS brand,
REPLACE(REPLACE(REPLACE(ac_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS ac_name,
REPLACE(REPLACE(REPLACE(material, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS material,
REPLACE(REPLACE(REPLACE(node_id, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS node_id,
data_type,
REPLACE(REPLACE(REPLACE(sp_num, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_num,
REPLACE(REPLACE(REPLACE(describe, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS describe,
REPLACE(REPLACE(REPLACE(weight_str, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS weight_str,
REPLACE(REPLACE(REPLACE(package_quantity, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS package_quantity,
REPLACE(REPLACE(REPLACE(pattern_name, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS pattern_name,
follow_sellers,
REPLACE(REPLACE(REPLACE(product_description, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS product_description,
REPLACE(REPLACE(REPLACE(buy_sales, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS buy_sales,
image_view,
spider_int,
REPLACE(REPLACE(REPLACE(lob_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS lob_asin_json,
REPLACE(REPLACE(REPLACE(seller_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS seller_json,
REPLACE(REPLACE(REPLACE(customer_reviews_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS customer_reviews_json,
REPLACE(REPLACE(REPLACE(product_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS product_json,
REPLACE(REPLACE(REPLACE(product_detail_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS product_detail_json,
REPLACE(REPLACE(REPLACE(review_ai_text, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS review_ai_text,
REPLACE(REPLACE(REPLACE(review_label_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS review_label_json,
REPLACE(REPLACE(REPLACE(sp_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_4stars_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_4stars_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_delivery_initial_seen_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS sp_delivery_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(compare_similar_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS compare_similar_asin_json,
REPLACE(REPLACE(REPLACE(together_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS together_asin_json,
REPLACE(REPLACE(REPLACE(min_match_asin_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS min_match_asin_json,
variat_num,
REPLACE(REPLACE(REPLACE(current_asin, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS current_asin,
REPLACE(REPLACE(REPLACE(img_list, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS img_list,
REPLACE(REPLACE(REPLACE(variat_list, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS variat_list,
REPLACE(REPLACE(REPLACE(parent_asin, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS parent_asin,
REPLACE(REPLACE(REPLACE(bundles_this_asins_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS bundles_this_asins_json,
REPLACE(REPLACE(REPLACE(video_m3u8_url, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS video_m3u8_url,
REPLACE(REPLACE(REPLACE(result_list_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS result_list_json,
REPLACE(REPLACE(REPLACE(bundle_asin_component_json, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS bundle_asin_component_json,
REPLACE(REPLACE(REPLACE(review_json_list, E'
\n
',' '), E'
\r
',' '), E'
\t
',' ') AS review_json_list
FROM {import_table}
WHERE 1=1 AND
\
$CONDITIONS
"""
engine
.
sqoop_raw_import
(
query
=
f
"SELECT {cols} FROM {import_table} WHERE 1=1 and
\
$CONDITIONS"
,
# query=f"SELECT {cols} FROM {import_table} WHERE 1=1 and \$CONDITIONS",
query
=
query
,
hive_table
=
hive_table
,
hdfs_path
=
hdfs_path
,
partitions
=
partition_dict
,
...
...
Pyspark_job/yswg_utils/common_udf.py
View file @
f3ab9b3c
...
...
@@ -835,7 +835,10 @@ def udf_parse_seller_json(seller_json):
:return: buy_box_seller_type: 1.amazon,2.fba,3.fbm,4.默认值(无类型)
:return: 类型、店铺名称、店铺id
"""
if
not
seller_json
:
# if not seller_json:
# if not seller_json or seller_json.strip() in ("", "null", "None"):
# return 0, None, None
if
'seller_id'
not
in
str
(
seller_json
):
return
0
,
None
,
None
else
:
seller_info_parsed
=
json
.
loads
(
seller_json
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment