Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
d10e99bf
Commit
d10e99bf
authored
Jun 23, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
top品牌改为Doris关联查询
parent
84d2c994
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
28 deletions
+7
-28
dwt_flow_asin_month.py
Pyspark_job/doris_handle/dwt_flow_asin_month.py
+5
-6
dwt_flow_asin.py
Pyspark_job/dwt/dwt_flow_asin.py
+2
-22
No files found.
Pyspark_job/doris_handle/dwt_flow_asin_month.py
View file @
d10e99bf
...
@@ -340,7 +340,7 @@ SELECT
...
@@ -340,7 +340,7 @@ SELECT
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat, 0) AS skus_num_creat,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
COALESCE(aa.skus_num_creat_all, 0) AS skus_num_creat_all,
f.title_matching_degree,
f.title_matching_degree,
f
.brand_badge_reason
bb
.brand_badge_reason
FROM `dwt`.`{site_name}_flow_asin_month` f
FROM `dwt`.`{site_name}_flow_asin_month` f
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
LEFT JOIN `dwd`.`dwd_asin_profit_rate_latest` pr
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
ON f.asin = pr.asin AND f.price = pr.price AND pr.site_name = '{site_name}'
...
@@ -387,6 +387,8 @@ LEFT JOIN (
...
@@ -387,6 +387,8 @@ LEFT JOIN (
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
) cf ON f.asin = cf.asin
) cf ON f.asin = cf.asin
LEFT JOIN `dwd`.`dwd_asin_auction` aa ON f.asin = aa.asin
LEFT JOIN `dwd`.`dwd_asin_auction` aa ON f.asin = aa.asin
LEFT JOIN `dwd`.`dwd_st_brand_badge` bb
ON f.brand = bb.brand AND bb.site_name = '{site_name}' AND bb.date_info = '{date_info}'
WHERE f.date_info = '{date_info}'
WHERE f.date_info = '{date_info}'
"""
"""
...
@@ -498,8 +500,7 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -498,8 +500,7 @@ def main(site_name, date_info, result_type='formal'):
asin_describe, asin_fbm_price, describe_len,
asin_describe, asin_fbm_price, describe_len,
title_matching_degree,
title_matching_degree,
multi_color_flag, multi_color_str,
multi_color_flag, multi_color_str,
amazon_label,
amazon_label
brand_badge_reason
FROM dwt_flow_asin
FROM dwt_flow_asin
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
WHERE site_name = '{site_name}' AND date_type = 'month' AND date_info = '{date_info}'
"""
"""
...
@@ -637,8 +638,6 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -637,8 +638,6 @@ def main(site_name, date_info, result_type='formal'):
F
.
col
(
'multi_color_str'
),
F
.
col
(
'multi_color_str'
),
# ===== Amazon标签 =====
# ===== Amazon标签 =====
F
.
col
(
'amazon_label'
),
F
.
col
(
'amazon_label'
),
# ===== 品牌徽章 =====
F
.
col
(
'brand_badge_reason'
),
)
.
cache
()
)
.
cache
()
count
=
df_save
.
count
()
count
=
df_save
.
count
()
...
@@ -663,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
...
@@ -663,7 +662,7 @@ def main(site_name, date_info, result_type='formal'):
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"asin_bought_month, asin_lqs_rating, asin_lqs_rating_detail, asin_lob_info, is_contains_lob_info, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"is_package_quantity_abnormal, zr_flow_proportion, matrix_flow_proportion, matrix_ao_val, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"product_features, img_info, collapse_asin, follow_sellers_count, asin_describe, fbm_price, describe_len, "
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label
, brand_badge_reason
"
"title_matching_degree, multi_color_flag, multi_color_str, amazon_label"
)
)
print
(
f
"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}"
)
print
(
f
"[Step 3] 写入 Doris {DORIS_DB}.{doris_table}"
)
DorisHelper
.
spark_export_with_columns
(
DorisHelper
.
spark_export_with_columns
(
...
...
Pyspark_job/dwt/dwt_flow_asin.py
View file @
d10e99bf
...
@@ -80,7 +80,6 @@ class DwtFlowAsin(Templates):
...
@@ -80,7 +80,6 @@ class DwtFlowAsin(Templates):
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_keepa_asin
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_source_flag
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_asin_source_flag
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_parent_asin_variat_agg
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_parent_asin_variat_agg
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
df_brand_badge
=
self
.
spark
.
sql
(
f
"select 1+1;"
)
self
.
color_set
=
set
()
# 颜色词表,read_data 阶段填充
self
.
color_set
=
set
()
# 颜色词表,read_data 阶段填充
@staticmethod
@staticmethod
...
@@ -303,19 +302,7 @@ class DwtFlowAsin(Templates):
...
@@ -303,19 +302,7 @@ class DwtFlowAsin(Templates):
self
.
df_asin_source_flag
=
self
.
df_asin_source_flag
.
repartition
(
60
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_asin_source_flag
=
self
.
df_asin_source_flag
.
repartition
(
60
)
.
persist
(
StorageLevel
.
DISK_ONLY
)
self
.
df_asin_source_flag
.
show
(
10
,
truncate
=
False
)
self
.
df_asin_source_flag
.
show
(
10
,
truncate
=
False
)
print
(
"11.读取当月品牌徽章数据 dwd_st_brand_badge"
)
print
(
"11.读取颜色词表 dim_asin_color_info"
)
sql
=
f
"""
select brand, brand_badge_reason
from dwd_st_brand_badge
where site_name = '{self.site_name}'
and date_type = '{self.date_type}'
and date_info = '{self.date_info}'
"""
print
(
"sql:"
+
sql
)
self
.
df_brand_badge
=
F
.
broadcast
(
self
.
spark
.
sql
(
sqlQuery
=
sql
))
self
.
df_brand_badge
.
show
(
10
,
truncate
=
False
)
print
(
"12.读取颜色词表 dim_asin_color_info"
)
color_rows
=
self
.
spark
.
sql
(
color_rows
=
self
.
spark
.
sql
(
f
"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
f
"SELECT lower(en_name) as en_name FROM dim_asin_color_info WHERE site_name='{self.site_name}'"
)
.
collect
()
)
.
collect
()
...
@@ -656,11 +643,6 @@ class DwtFlowAsin(Templates):
...
@@ -656,11 +643,6 @@ class DwtFlowAsin(Templates):
self
.
df_flow_asin_last
.
unpersist
()
self
.
df_flow_asin_last
.
unpersist
()
self
.
df_flow_asin_last_year
.
unpersist
()
self
.
df_flow_asin_last_year
.
unpersist
()
def
handle_brand_badge
(
self
):
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_brand_badge
,
self
.
df_asin_detail
[
'asin_brand_name'
]
==
self
.
df_brand_badge
[
'brand'
],
how
=
'left'
)
.
drop
(
'brand'
)
def
handle_asin_different_source
(
self
):
def
handle_asin_different_source
(
self
):
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_asin_detail
=
self
.
df_asin_detail
.
join
(
self
.
df_asin_source_flag
,
on
=
[
'asin'
],
how
=
'left'
self
.
df_asin_source_flag
,
on
=
[
'asin'
],
how
=
'left'
...
@@ -827,8 +809,7 @@ class DwtFlowAsin(Templates):
...
@@ -827,8 +809,7 @@ class DwtFlowAsin(Templates):
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"asin_describe"
,
"asin_fbm_price"
,
"matrix_ao_val"
,
"follow_sellers_count"
,
"seller_json"
,
"asin_describe"
,
"asin_fbm_price"
,
"asin_bought_mom"
,
"asin_bought_yoy"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"asin_bought_mom"
,
"asin_bought_yoy"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"multi_color_flag"
,
"multi_color_str"
,
"amazon_label"
,
"asin_weight_str"
,
"best_sellers_herf"
,
"best_sellers_rank"
,
"multi_color_flag"
,
"multi_color_str"
,
"amazon_label"
,
"asin_weight_str"
,
"best_sellers_herf"
,
"best_sellers_rank"
,
"brand_badge_reason"
,
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
site_name
)
.
alias
(
"site_name"
),
F
.
lit
(
self
.
date_type
)
.
alias
(
"date_type"
),
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
F
.
lit
(
self
.
date_info
)
.
alias
(
"date_info"
))
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
self
.
df_save
=
self
.
df_save
.
na
.
fill
(
...
@@ -950,7 +931,6 @@ class DwtFlowAsin(Templates):
...
@@ -950,7 +931,6 @@ class DwtFlowAsin(Templates):
self
.
handle_other_new_col
()
self
.
handle_other_new_col
()
self
.
handle_multi_color_flag
()
self
.
handle_multi_color_flag
()
self
.
handle_asin_different_source
()
self
.
handle_asin_different_source
()
self
.
handle_brand_badge
()
self
.
handle_column
()
self
.
handle_column
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment