Commit f3ab9b3c by fangxingjun

修复ods_asin_detail详情表数据同步造成错位问题

parent 232a786a
......@@ -41,8 +41,85 @@ if __name__ == '__main__':
db_type=db_type
)
query = f"""
SELECT
id,
asin,
REPLACE(REPLACE(REPLACE(img_url, E'\n',' '), E'\r',' '), E'\t',' ') AS img_url,
REPLACE(REPLACE(REPLACE(title, E'\n',' '), E'\r',' '), E'\t',' ') AS title,
title_len,
price,
rating,
total_comments,
buy_box_seller_type,
page_inventory,
REPLACE(REPLACE(REPLACE(category, E'\n',' '), E'\r',' '), E'\t',' ') AS category,
REPLACE(REPLACE(REPLACE(volume, E'\n',' '), E'\r',' '), E'\t',' ') AS volume,
weight,
rank,
launch_time,
created_time AS created_at, updated_time AS updated_at,
category_state,
img_num,
REPLACE(REPLACE(REPLACE(img_type, E'\n',' '), E'\r',' '), E'\t',' ') AS img_type,
REPLACE(REPLACE(REPLACE(activity_type, E'\n',' '), E'\r',' '), E'\t',' ') AS activity_type,
one_two_val,
three_four_val,
five_six_val,
eight_val,
qa_num,
one_star,
two_star,
three_star,
four_star,
five_star,
low_star,
REPLACE(REPLACE(REPLACE(together_asin, E'\n',' '), E'\r',' '), E'\t',' ') AS together_asin,
REPLACE(REPLACE(REPLACE(brand, E'\n',' '), E'\r',' '), E'\t',' ') AS brand,
REPLACE(REPLACE(REPLACE(ac_name, E'\n',' '), E'\r',' '), E'\t',' ') AS ac_name,
REPLACE(REPLACE(REPLACE(material, E'\n',' '), E'\r',' '), E'\t',' ') AS material,
REPLACE(REPLACE(REPLACE(node_id, E'\n',' '), E'\r',' '), E'\t',' ') AS node_id,
data_type,
REPLACE(REPLACE(REPLACE(sp_num, E'\n',' '), E'\r',' '), E'\t',' ') AS sp_num,
REPLACE(REPLACE(REPLACE(describe, E'\n',' '), E'\r',' '), E'\t',' ') AS describe,
REPLACE(REPLACE(REPLACE(weight_str, E'\n',' '), E'\r',' '), E'\t',' ') AS weight_str,
REPLACE(REPLACE(REPLACE(package_quantity, E'\n',' '), E'\r',' '), E'\t',' ') AS package_quantity,
REPLACE(REPLACE(REPLACE(pattern_name, E'\n',' '), E'\r',' '), E'\t',' ') AS pattern_name,
follow_sellers,
REPLACE(REPLACE(REPLACE(product_description, E'\n',' '), E'\r',' '), E'\t',' ') AS product_description,
REPLACE(REPLACE(REPLACE(buy_sales, E'\n',' '), E'\r',' '), E'\t',' ') AS buy_sales,
image_view,
spider_int,
REPLACE(REPLACE(REPLACE(lob_asin_json, E'\n',' '), E'\r',' '), E'\t',' ') AS lob_asin_json,
REPLACE(REPLACE(REPLACE(seller_json, E'\n',' '), E'\r',' '), E'\t',' ') AS seller_json,
REPLACE(REPLACE(REPLACE(customer_reviews_json, E'\n',' '), E'\r',' '), E'\t',' ') AS customer_reviews_json,
REPLACE(REPLACE(REPLACE(product_json, E'\n',' '), E'\r',' '), E'\t',' ') AS product_json,
REPLACE(REPLACE(REPLACE(product_detail_json, E'\n',' '), E'\r',' '), E'\t',' ') AS product_detail_json,
REPLACE(REPLACE(REPLACE(review_ai_text, E'\n',' '), E'\r',' '), E'\t',' ') AS review_ai_text,
REPLACE(REPLACE(REPLACE(review_label_json, E'\n',' '), E'\r',' '), E'\t',' ') AS review_label_json,
REPLACE(REPLACE(REPLACE(sp_initial_seen_asins_json, E'\n',' '), E'\r',' '), E'\t',' ') AS sp_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_4stars_initial_seen_asins_json, E'\n',' '), E'\r',' '), E'\t',' ') AS sp_4stars_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(sp_delivery_initial_seen_asins_json, E'\n',' '), E'\r',' '), E'\t',' ') AS sp_delivery_initial_seen_asins_json,
REPLACE(REPLACE(REPLACE(compare_similar_asin_json, E'\n',' '), E'\r',' '), E'\t',' ') AS compare_similar_asin_json,
REPLACE(REPLACE(REPLACE(together_asin_json, E'\n',' '), E'\r',' '), E'\t',' ') AS together_asin_json,
REPLACE(REPLACE(REPLACE(min_match_asin_json, E'\n',' '), E'\r',' '), E'\t',' ') AS min_match_asin_json,
variat_num,
REPLACE(REPLACE(REPLACE(current_asin, E'\n',' '), E'\r',' '), E'\t',' ') AS current_asin,
REPLACE(REPLACE(REPLACE(img_list, E'\n',' '), E'\r',' '), E'\t',' ') AS img_list,
REPLACE(REPLACE(REPLACE(variat_list, E'\n',' '), E'\r',' '), E'\t',' ') AS variat_list,
REPLACE(REPLACE(REPLACE(parent_asin, E'\n',' '), E'\r',' '), E'\t',' ') AS parent_asin,
REPLACE(REPLACE(REPLACE(bundles_this_asins_json, E'\n',' '), E'\r',' '), E'\t',' ') AS bundles_this_asins_json,
REPLACE(REPLACE(REPLACE(video_m3u8_url, E'\n',' '), E'\r',' '), E'\t',' ') AS video_m3u8_url,
REPLACE(REPLACE(REPLACE(result_list_json, E'\n',' '), E'\r',' '), E'\t',' ') AS result_list_json,
REPLACE(REPLACE(REPLACE(bundle_asin_component_json, E'\n',' '), E'\r',' '), E'\t',' ') AS bundle_asin_component_json,
REPLACE(REPLACE(REPLACE(review_json_list, E'\n',' '), E'\r',' '), E'\t',' ') AS review_json_list
FROM {import_table}
WHERE 1=1 AND \$CONDITIONS
"""
engine.sqoop_raw_import(
query=f"SELECT {cols} FROM {import_table} WHERE 1=1 and \$CONDITIONS",
# query=f"SELECT {cols} FROM {import_table} WHERE 1=1 and \$CONDITIONS",
query=query,
hive_table=hive_table,
hdfs_path=hdfs_path,
partitions=partition_dict,
......
......@@ -835,7 +835,10 @@ def udf_parse_seller_json(seller_json):
:return: buy_box_seller_type: 1.amazon,2.fba,3.fbm,4.默认值(无类型)
:return: 类型、店铺名称、店铺id
"""
if not seller_json:
# if not seller_json:
# if not seller_json or seller_json.strip() in ("", "null", "None"):
# return 0, None, None
if 'seller_id' not in str(seller_json):
return 0, None, None
else:
seller_info_parsed = json.loads(seller_json)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment