Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
63a52979
Commit
63a52979
authored
Sep 05, 2025
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
流量选品-新增asin五点描述
parent
6840c5f3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
5 deletions
+8
-5
dwt_flow_asin.py
Pyspark_job/dwt/dwt_flow_asin.py
+4
-4
es_flow_asin.py
Pyspark_job/export_es/es_flow_asin.py
+1
-1
es_util.py
Pyspark_job/utils/es_util.py
+3
-0
No files found.
Pyspark_job/dwt/dwt_flow_asin.py
View file @
63a52979
...
@@ -399,7 +399,7 @@ class DwtFlowAsin(Templates):
...
@@ -399,7 +399,7 @@ class DwtFlowAsin(Templates):
"img_num_rating"
)
+
F
.
col
(
"img_enlarge_rating"
)
"img_num_rating"
)
+
F
.
col
(
"img_enlarge_rating"
)
)
)
self
.
df_asin_detail
=
self
.
df_asin_detail
.
\
self
.
df_asin_detail
=
self
.
df_asin_detail
.
\
drop
(
"is_with_product_description"
,
"asin_
describe"
,
"asin_
image_view"
,
"category_node_rating"
,
"zr_rating"
,
drop
(
"is_with_product_description"
,
"asin_image_view"
,
"category_node_rating"
,
"zr_rating"
,
"sp_rating"
,
"a_add_rating"
,
"video_rating"
,
"brand_rating"
,
"product_describe_rating"
,
"sp_rating"
,
"a_add_rating"
,
"video_rating"
,
"brand_rating"
,
"product_describe_rating"
,
"highlight_rating"
,
"title_len_rating"
,
"title_brand_rating"
,
"img_num_rating"
,
"img_enlarge_rating"
)
"highlight_rating"
,
"title_len_rating"
,
"title_brand_rating"
,
"img_num_rating"
,
"img_enlarge_rating"
)
...
@@ -503,7 +503,7 @@ class DwtFlowAsin(Templates):
...
@@ -503,7 +503,7 @@ class DwtFlowAsin(Templates):
F
.
lit
(
None
)
.
alias
(
"buy_data_viewed_month"
),
F
.
lit
(
None
)
.
alias
(
"buy_data_viewed_week"
),
F
.
lit
(
None
)
.
alias
(
"buy_data_viewed_month"
),
F
.
lit
(
None
)
.
alias
(
"buy_data_viewed_week"
),
F
.
lit
(
None
)
.
alias
(
"theme_en"
),
F
.
lit
(
None
)
.
alias
(
"theme_label_en"
),
"asin_lqs_rating"
,
F
.
lit
(
None
)
.
alias
(
"theme_en"
),
F
.
lit
(
None
)
.
alias
(
"theme_label_en"
),
"asin_lqs_rating"
,
"asin_lqs_rating_detail"
,
"title_matching_degree"
,
"zr_flow_proportion"
,
"matrix_flow_proportion"
,
"asin_lqs_rating_detail"
,
"title_matching_degree"
,
"zr_flow_proportion"
,
"matrix_flow_proportion"
,
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"asin_describe"
,
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
...
@@ -553,14 +553,14 @@ class DwtFlowAsin(Templates):
...
@@ -553,14 +553,14 @@ class DwtFlowAsin(Templates):
F
.
col
(
"current_category_rank"
)
.
alias
(
"category_current_rank"
),
"asin_type"
,
F
.
col
(
"current_category_rank"
)
.
alias
(
"category_current_rank"
),
"asin_type"
,
"bsr_orders"
,
F
.
col
(
"sales"
)
.
alias
(
"bsr_orders_sale"
),
"bsr_orders"
,
F
.
col
(
"sales"
)
.
alias
(
"bsr_orders_sale"
),
F
.
col
(
"asin_page_inventory"
)
.
alias
(
"page_inventory"
),
"asin_bought_month"
,
"seller_json"
,
F
.
col
(
"asin_page_inventory"
)
.
alias
(
"page_inventory"
),
"asin_bought_month"
,
"seller_json"
,
F
.
col
(
"asin_buy_box_seller_type"
)
.
alias
(
"buy_box_seller_type"
)
F
.
col
(
"asin_buy_box_seller_type"
)
.
alias
(
"buy_box_seller_type"
)
,
"asin_describe"
)
)
table_columns
=
"""asin, asin_ao_val, asin_title, asin_title_len, asin_category_desc, asin_volume,
table_columns
=
"""asin, asin_ao_val, asin_title, asin_title_len, asin_category_desc, asin_volume,
asin_weight, asin_launch_time, asin_brand_name, one_star, two_star, three_star, four_star, five_star, low_star,
asin_weight, asin_launch_time, asin_brand_name, one_star, two_star, three_star, four_star, five_star, low_star,
account_name, account_id, seller_country_name, category_first_id, parent_asin, variation_num, img_info,
account_name, account_id, seller_country_name, category_first_id, parent_asin, variation_num, img_info,
asin_crawl_date, asin_price, asin_rating, asin_total_comments, matrix_ao_val, zr_flow_proportion, matrix_flow_proportion,
asin_crawl_date, asin_price, asin_rating, asin_total_comments, matrix_ao_val, zr_flow_proportion, matrix_flow_proportion,
date_info, img_url, category_current_id, category_first_rank, category_current_rank, asin_type, bsr_orders, bsr_orders_sale,
date_info, img_url, category_current_id, category_first_rank, category_current_rank, asin_type, bsr_orders, bsr_orders_sale,
page_inventory, asin_bought_month, seller_json, buy_box_seller_type"""
page_inventory, asin_bought_month, seller_json, buy_box_seller_type
, asin_describe
"""
DorisHelper
.
spark_export_with_columns
(
df_save
=
df_doris
,
db_name
=
self
.
doris_db
,
table_name
=
self
.
asin_latest_detail_table
,
table_columns
=
table_columns
)
DorisHelper
.
spark_export_with_columns
(
df_save
=
df_doris
,
db_name
=
self
.
doris_db
,
table_name
=
self
.
asin_latest_detail_table
,
table_columns
=
table_columns
)
print
(
"save asin_latest_detail success"
)
print
(
"save asin_latest_detail success"
)
else
:
else
:
...
...
Pyspark_job/export_es/es_flow_asin.py
View file @
63a52979
...
@@ -93,7 +93,7 @@ class EsStDetail(TemplatesMysql):
...
@@ -93,7 +93,7 @@ class EsStDetail(TemplatesMysql):
current_category_rank, asin_weight_ratio, asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail,
current_category_rank, asin_weight_ratio, asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail,
title_matching_degree, asin_lob_info, is_contains_lob_info, is_package_quantity_abnormal, zr_flow_proportion,
title_matching_degree, asin_lob_info, is_contains_lob_info, is_package_quantity_abnormal, zr_flow_proportion,
matrix_flow_proportion, matrix_ao_val, customer_reviews_json as product_features, img_info,
matrix_flow_proportion, matrix_ao_val, customer_reviews_json as product_features, img_info,
coalesce(parent_asin, asin) as collapse_asin, follow_sellers_count
coalesce(parent_asin, asin) as collapse_asin, follow_sellers_count
, asin_describe
from {self.table_name} where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}'
from {self.table_name} where site_name='{self.site_name}' and date_type='{self.date_type}' and date_info='{self.date_info}'
"""
"""
print
(
"sql:"
,
sql
)
print
(
"sql:"
,
sql
)
...
...
Pyspark_job/utils/es_util.py
View file @
63a52979
...
@@ -435,6 +435,9 @@ class EsUtils(object):
...
@@ -435,6 +435,9 @@ class EsUtils(object):
},
},
"follow_sellers_count"
:
{
"follow_sellers_count"
:
{
"type"
:
"integer"
"type"
:
"integer"
},
"asin_describe"
:
{
"type"
:
"text"
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment