Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
bb777f59
Commit
bb777f59
authored
Jun 22, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
流量选品月流程,增加top品牌badge
parent
bc0deed5
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
6 deletions
+30
-6
dwt_flow_asin_month.py
Pyspark_job/doris_handle/dwt_flow_asin_month.py
+8
-3
dwt_flow_asin.py
Pyspark_job/dwt/dwt_flow_asin.py
+22
-3
No files found.
Pyspark_job/doris_handle/dwt_flow_asin_month.py
View file @
bb777f59
...
@@ -196,6 +196,7 @@ CREATE TABLE IF NOT EXISTS `selection`.`{table_name}`
...
@@ -196,6 +196,7 @@ CREATE TABLE IF NOT EXISTS `selection`.`{table_name}`
`skus_num_creat` INT NULL,
`skus_num_creat` INT NULL,
`skus_num_creat_all` INT NULL,
`skus_num_creat_all` INT NULL,
`title_matching_degree` DECIMAL(20,4) NULL,
`title_matching_degree` DECIMAL(20,4) NULL,
`brand_badge_reason` STRING NULL,
INDEX idx_title (`title`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题倒排索引',
INDEX idx_title (`title`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题倒排索引',
INDEX idx_title_stem (`title_stem`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题词干倒排索引'
INDEX idx_title_stem (`title_stem`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '标题词干倒排索引'
) ENGINE=OLAP
) ENGINE=OLAP
...
@@ -337,7 +338,8 @@ SELECT
...
@@ -337,7 +338,8 @@ SELECT
COALESCE(aa.auctions_num_all, 0) AS auctions_num_all,
COALESCE(aa.auctions_num_all, 0) AS auctions_num_all,
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
f.title_matching_degree
f.title_matching_degree,
f.brand_badge_reason
FROM `dwt`.`{site_name}_flow_asin_month` f
FROM `dwt`.`{site_name}_flow_asin_month` f
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
...
@@ -495,7 +497,8 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -495,7 +497,8 @@ def main(site_name, date_info, result_type='formal'):
asin_describe, asin_fbm_price, describe_len,
asin_describe, asin_fbm_price, describe_len,
title_matching_degree,
title_matching_degree,
multi_color_flag, multi_color_str,
multi_color_flag, multi_color_str,
amazon_label
amazon_label,
brand_badge_reason
FROM dwt_flow_asin
FROM dwt_flow_asin
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
"""
"""
...
@@ -633,6 +636,8 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -633,6 +636,8 @@ def main(site_name, date_info, result_type='formal'):
F
.
col
(
'multi_color_str'
),
F
.
col
(
'multi_color_str'
),
# ===== Amazon标签 =====
# ===== Amazon标签 =====
F
.
col
(
'amazon_label'
),
F
.
col
(
'amazon_label'
),
# ===== 品牌徽章 =====
F
.
col
(
'brand_badge_reason'
),
)
.
cache
()
)
.
cache
()
count
=
df_save
.
count
()
count
=
df_save
.
count
()
...
@@ -657,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -657,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label"
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label
, brand_badge_reason
"
)
)
print
(
f
"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}"
)
print
(
f
"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}"
)
DorisHelper
.
spark_export_with_columns
(
DorisHelper
.
spark_export_with_columns
(
...
...
Pyspark_job/dwt/dwt_flow_asin.py
View file @
bb777f59
...
@@ -80,6 +80,7 @@ class DwtFlowAsin(Templates):
...
@@ -80,6 +80,7 @@ class DwtFlowAsin(Templates):
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_source_flag
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_source_flag
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_parent_asin_variat_agg
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_parent_asin_variat_agg
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_brand_badge
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
color_set
=
set
()
# 颜色词表,read_data 阶段填充
self
.
color_set
=
set
()
# 颜色词表,read_data 阶段填充
@staticmethod
@staticmethod
...
@@ -302,7 +303,19 @@ class DwtFlowAsin(Templates):
...
@@ -302,7 +303,19 @@ class DwtFlowAsin(Templates):
self
.
df_asin_source_flag
=
self
.
df_asin_source_flag
.
repartition
(
60
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_asin_source_flag
=
self
.
df_asin_source_flag
.
repartition
(
60
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_asin_source_flag
.
show
(
10
,
truncate
=
False
)
self
.
df_asin_source_flag
.
show
(
10
,
truncate
=
False
)
print
(
"11.读取颜色词表 dim_asin_color_info"
)
print
(
"11.读取当月品牌徽章数据 dwd_st_brand_badge"
)
sql
=
f
"""
select brand, brand_badge_reason
from dwd_st_brand_badge
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
and date_info = '{self.date_info}'
"""
print
(
"sql:"
+
sql
)
self
.
df_brand_badge
=
F
.
broadcast
(
self
.
spark
.
sql
(
sqlQuery
=
sql
))
self
.
df_brand_badge
.
show
(
10
,
truncate
=
False
)
print
(
"12.读取颜色词表 dim_asin_color_info"
)
color_rows
=
self
.
spark
.
sql
(
color_rows
=
self
.
spark
.
sql
(
f
"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
f
"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
)
.
collect
()
)
.
collect
()
...
@@ -643,6 +656,11 @@ class DwtFlowAsin(Templates):
...
@@ -643,6 +656,11 @@ class DwtFlowAsin(Templates):
self
.
df_flow_asin_last
.
unpersist
()
self
.
df_flow_asin_last
.
unpersist
()
self
.
df_flow_asin_last_year
.
unpersist
()
self
.
df_flow_asin_last_year
.
unpersist
()
def
handle_brand_badge
(
self
):
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_brand_badge
,
self
.
df_asin_detail
[
'asin_brand_name'
]
==
self
.
df_brand_badge
[
'brand'
],
how
=
'left'
)
.
drop
(
'brand'
)
def
handle_asin_different_source
(
self
):
def
handle_asin_different_source
(
self
):
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_asin_source_flag
,
on
=
[
'asin'
],
how
=
'left'
self
.
df_asin_source_flag
,
on
=
[
'asin'
],
how
=
'left'
...
@@ -809,8 +827,8 @@ class DwtFlowAsin(Templates):
...
@@ -809,8 +827,8 @@ class DwtFlowAsin(Templates):
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"asin_describe"
,
"asin_fbm_price"
,
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"asin_describe"
,
"asin_fbm_price"
,
"asin_bought_mom"
,
"asin_bought_yoy"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"asin_bought_mom"
,
"asin_bought_yoy"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"multi_color_flag"
,
"multi_color_str"
,
"amazon_label"
,
"multi_color_flag"
,
"multi_color_str"
,
"amazon_label"
,
"asin_weight_str"
,
"best_sellers_herf"
,
"best_sellers_rank"
,
"
asin_weight_str"
,
"best_sellers_herf"
,
"best_sellers_rank
"
,
"
brand_badge_reason
"
,
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
...
@@ -932,6 +950,7 @@ class DwtFlowAsin(Templates):
...
@@ -932,6 +950,7 @@ class DwtFlowAsin(Templates):
self
.
handle_other_new_col
()
self
.
handle_other_new_col
()
self
.
handle_multi_color_flag
()
self
.
handle_multi_color_flag
()
self
.
handle_asin_different_source
()
self
.
handle_asin_different_source
()
self
.
handle_brand_badge
()
self
.
handle_column
()
self
.
handle_column
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment