Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
81ba7508
Commit
81ba7508
authored
Mar 13, 2026
by
chenyuanjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
0cd7f499
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
16 deletions
+34
-16
es_flow_asin.py
Pyspark_job/export_es/es_flow_asin.py
+8
-3
kafka_flow_asin_detail.py
Pyspark_job/my_kafka/kafka_flow_asin_detail.py
+11
-6
kafka_rank_asin_detail.py
Pyspark_job/my_kafka/kafka_rank_asin_detail.py
+11
-6
es_util.py
Pyspark_job/utils/es_util.py
+4
-1
No files found.
Pyspark_job/export_es/es_flow_asin.py
View file @
81ba7508
...
@@ -114,9 +114,14 @@ class EsStDetail(TemplatesMysql):
...
@@ -114,9 +114,14 @@ class EsStDetail(TemplatesMysql):
print
(
"sql:"
,
sql
)
print
(
"sql:"
,
sql
)
self
.
df_profit_rate
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
.
withColumn
(
self
.
df_profit_rate
=
self
.
spark
.
sql
(
sqlQuery
=
sql
)
.
repartition
(
40
,
'asin'
)
.
withColumn
(
"profit_rate_extra"
,
"profit_rate_extra"
,
F
.
struct
(
F
.
when
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"ocean_profit"
)
.
isNull
()
&
F
.
col
(
"air_profit"
)
.
isNull
(),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
F
.
lit
(
None
)
)
.
otherwise
(
F
.
struct
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
)
)
)
)
.
drop
(
"ocean_profit"
,
"air_profit"
)
)
.
drop
(
"ocean_profit"
,
"air_profit"
)
...
...
Pyspark_job/my_kafka/kafka_flow_asin_detail.py
View file @
81ba7508
...
@@ -657,9 +657,14 @@ class KafkaFlowAsinDetail(Templates):
...
@@ -657,9 +657,14 @@ class KafkaFlowAsinDetail(Templates):
'profit_key'
,
F
.
concat_ws
(
"_"
,
F
.
col
(
"asin"
),
F
.
col
(
"price"
))
'profit_key'
,
F
.
concat_ws
(
"_"
,
F
.
col
(
"asin"
),
F
.
col
(
"price"
))
)
.
withColumn
(
)
.
withColumn
(
"profit_rate_extra"
,
"profit_rate_extra"
,
F
.
struct
(
F
.
when
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"ocean_profit"
)
.
isNull
()
&
F
.
col
(
"air_profit"
)
.
isNull
(),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
F
.
lit
(
None
)
)
.
otherwise
(
F
.
struct
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
)
)
)
)
.
drop
(
'ocean_profit'
,
'air_profit'
)
)
.
drop
(
'ocean_profit'
,
'air_profit'
)
return
df
return
df
...
@@ -676,8 +681,8 @@ class KafkaFlowAsinDetail(Templates):
...
@@ -676,8 +681,8 @@ class KafkaFlowAsinDetail(Templates):
.
withColumnRenamed
(
"customer_reviews_json"
,
"product_features"
)
\
.
withColumnRenamed
(
"customer_reviews_json"
,
"product_features"
)
\
.
withColumn
(
"collapse_asin"
,
F
.
coalesce
(
F
.
col
(
"parent_asin"
),
F
.
col
(
"asin"
)))
\
.
withColumn
(
"collapse_asin"
,
F
.
coalesce
(
F
.
col
(
"parent_asin"
),
F
.
col
(
"asin"
)))
\
.
withColumn
(
"bsr_best_orders_type"
,
F
.
lit
(
-
1
))
\
.
withColumn
(
"bsr_best_orders_type"
,
F
.
lit
(
-
1
))
\
.
withColumn
(
"img_type"
,
F
.
split
(
F
.
col
(
"img_type"
),
","
))
\
.
withColumn
(
"img_type
_arr
"
,
F
.
split
(
F
.
col
(
"img_type"
),
","
))
\
.
withColumn
(
"img_type
"
,
F
.
expr
(
"transform(img_type
, x -> cast(x as int))"
))
.
withColumn
(
"img_type
_arr"
,
F
.
expr
(
"transform(img_type_arr
, x -> cast(x as int))"
))
df_save
=
df
.
select
(
"asin"
,
"ao_val"
,
"zr_counts"
,
"sp_counts"
,
"sb_counts"
,
"vi_counts"
,
"bs_counts"
,
"ac_counts"
,
df_save
=
df
.
select
(
"asin"
,
"ao_val"
,
"zr_counts"
,
"sp_counts"
,
"sb_counts"
,
"vi_counts"
,
"bs_counts"
,
"ac_counts"
,
"tr_counts"
,
"er_counts"
,
"bsr_orders"
,
"bsr_orders_sale"
,
"title"
,
"title_len"
,
"price"
,
"tr_counts"
,
"er_counts"
,
"bsr_orders"
,
"bsr_orders_sale"
,
"title"
,
"title_len"
,
"price"
,
"rating"
,
"total_comments"
,
"buy_box_seller_type"
,
"page_inventory"
,
"volume"
,
"weight"
,
"color"
,
"rating"
,
"total_comments"
,
"buy_box_seller_type"
,
"page_inventory"
,
"volume"
,
"weight"
,
"color"
,
...
@@ -698,7 +703,7 @@ class KafkaFlowAsinDetail(Templates):
...
@@ -698,7 +703,7 @@ class KafkaFlowAsinDetail(Templates):
"collapse_asin"
,
F
.
col
(
"follow_sellers"
)
.
alias
(
"follow_sellers_count"
),
"seller_json"
,
"collapse_asin"
,
F
.
col
(
"follow_sellers"
)
.
alias
(
"follow_sellers_count"
),
"seller_json"
,
F
.
col
(
"describe"
)
.
alias
(
"asin_describe"
),
F
.
round
(
"fbm_delivery_price"
,
2
)
.
alias
(
"fbm_price"
),
F
.
col
(
"describe"
)
.
alias
(
"asin_describe"
),
F
.
round
(
"fbm_delivery_price"
,
2
)
.
alias
(
"fbm_price"
),
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"profit_key"
,
"profit_rate_extra"
)
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"profit_key"
,
"profit_rate_extra"
,
"img_type_arr"
)
df_save
=
df_save
.
na
.
fill
(
df_save
=
df_save
.
na
.
fill
(
{
"zr_counts"
:
0
,
"sp_counts"
:
0
,
"sb_counts"
:
0
,
"vi_counts"
:
0
,
"bs_counts"
:
0
,
"ac_counts"
:
0
,
{
"zr_counts"
:
0
,
"sp_counts"
:
0
,
"sb_counts"
:
0
,
"vi_counts"
:
0
,
"bs_counts"
:
0
,
"ac_counts"
:
0
,
"tr_counts"
:
0
,
"er_counts"
:
0
,
"title_len"
:
0
,
"total_comments"
:
0
,
"variation_num"
:
0
,
"img_num"
:
0
,
"tr_counts"
:
0
,
"er_counts"
:
0
,
"title_len"
:
0
,
"total_comments"
:
0
,
"variation_num"
:
0
,
"img_num"
:
0
,
...
...
Pyspark_job/my_kafka/kafka_rank_asin_detail.py
View file @
81ba7508
...
@@ -656,9 +656,14 @@ class KafkaRankAsinDetail(Templates):
...
@@ -656,9 +656,14 @@ class KafkaRankAsinDetail(Templates):
'profit_key'
,
F
.
concat_ws
(
"_"
,
F
.
col
(
"asin"
),
F
.
col
(
"price"
))
'profit_key'
,
F
.
concat_ws
(
"_"
,
F
.
col
(
"asin"
),
F
.
col
(
"price"
))
)
.
withColumn
(
)
.
withColumn
(
"profit_rate_extra"
,
"profit_rate_extra"
,
F
.
struct
(
F
.
when
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"ocean_profit"
)
.
isNull
()
&
F
.
col
(
"air_profit"
)
.
isNull
(),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
F
.
lit
(
None
)
)
.
otherwise
(
F
.
struct
(
F
.
col
(
"ocean_profit"
)
.
alias
(
"ocean_profit"
),
F
.
col
(
"air_profit"
)
.
alias
(
"air_profit"
)
)
)
)
)
.
drop
(
'ocean_profit'
,
'air_profit'
)
)
.
drop
(
'ocean_profit'
,
'air_profit'
)
return
df
return
df
...
@@ -675,8 +680,8 @@ class KafkaRankAsinDetail(Templates):
...
@@ -675,8 +680,8 @@ class KafkaRankAsinDetail(Templates):
.
withColumnRenamed
(
"customer_reviews_json"
,
"product_features"
)
\
.
withColumnRenamed
(
"customer_reviews_json"
,
"product_features"
)
\
.
withColumn
(
"collapse_asin"
,
F
.
coalesce
(
F
.
col
(
"parent_asin"
),
F
.
col
(
"asin"
)))
\
.
withColumn
(
"collapse_asin"
,
F
.
coalesce
(
F
.
col
(
"parent_asin"
),
F
.
col
(
"asin"
)))
\
.
withColumn
(
"bsr_best_orders_type"
,
F
.
lit
(
-
1
))
\
.
withColumn
(
"bsr_best_orders_type"
,
F
.
lit
(
-
1
))
\
.
withColumn
(
"img_type"
,
F
.
split
(
F
.
col
(
"img_type"
),
","
))
\
.
withColumn
(
"img_type
_arr
"
,
F
.
split
(
F
.
col
(
"img_type"
),
","
))
\
.
withColumn
(
"img_type
"
,
F
.
expr
(
"transform(img_type
, x -> cast(x as int))"
))
.
withColumn
(
"img_type
_arr"
,
F
.
expr
(
"transform(img_type_arr
, x -> cast(x as int))"
))
df_save
=
df
.
select
(
"asin"
,
"ao_val"
,
"zr_counts"
,
"sp_counts"
,
"sb_counts"
,
"vi_counts"
,
"bs_counts"
,
"ac_counts"
,
df_save
=
df
.
select
(
"asin"
,
"ao_val"
,
"zr_counts"
,
"sp_counts"
,
"sb_counts"
,
"vi_counts"
,
"bs_counts"
,
"ac_counts"
,
"tr_counts"
,
"er_counts"
,
"bsr_orders"
,
"bsr_orders_sale"
,
"title"
,
"title_len"
,
"price"
,
"tr_counts"
,
"er_counts"
,
"bsr_orders"
,
"bsr_orders_sale"
,
"title"
,
"title_len"
,
"price"
,
"rating"
,
"total_comments"
,
"buy_box_seller_type"
,
"page_inventory"
,
"volume"
,
"weight"
,
"color"
,
"rating"
,
"total_comments"
,
"buy_box_seller_type"
,
"page_inventory"
,
"volume"
,
"weight"
,
"color"
,
...
@@ -697,7 +702,7 @@ class KafkaRankAsinDetail(Templates):
...
@@ -697,7 +702,7 @@ class KafkaRankAsinDetail(Templates):
"collapse_asin"
,
F
.
col
(
"follow_sellers"
)
.
alias
(
"follow_sellers_count"
),
"seller_json"
,
"collapse_asin"
,
F
.
col
(
"follow_sellers"
)
.
alias
(
"follow_sellers_count"
),
"seller_json"
,
F
.
col
(
"describe"
)
.
alias
(
"asin_describe"
),
F
.
round
(
"fbm_delivery_price"
,
2
)
.
alias
(
"fbm_price"
),
F
.
col
(
"describe"
)
.
alias
(
"asin_describe"
),
F
.
round
(
"fbm_delivery_price"
,
2
)
.
alias
(
"fbm_price"
),
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"asin_source_flag"
,
"bsr_last_seen_at"
,
"bsr_seen_count_30d"
,
"nsr_last_seen_at"
,
"nsr_seen_count_30d"
,
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"profit_key"
,
"profit_rate_extra"
)
"describe_len"
,
"tracking_since"
,
"tracking_since_type"
,
"profit_key"
,
"profit_rate_extra"
,
"img_type_arr"
)
df_save
=
df_save
.
na
.
fill
(
df_save
=
df_save
.
na
.
fill
(
{
"zr_counts"
:
0
,
"sp_counts"
:
0
,
"sb_counts"
:
0
,
"vi_counts"
:
0
,
"bs_counts"
:
0
,
"ac_counts"
:
0
,
{
"zr_counts"
:
0
,
"sp_counts"
:
0
,
"sb_counts"
:
0
,
"vi_counts"
:
0
,
"bs_counts"
:
0
,
"ac_counts"
:
0
,
"tr_counts"
:
0
,
"er_counts"
:
0
,
"title_len"
:
0
,
"total_comments"
:
0
,
"variation_num"
:
0
,
"img_num"
:
0
,
"tr_counts"
:
0
,
"er_counts"
:
0
,
"title_len"
:
0
,
"total_comments"
:
0
,
"variation_num"
:
0
,
"img_num"
:
0
,
...
...
Pyspark_job/utils/es_util.py
View file @
81ba7508
...
@@ -731,7 +731,7 @@ class EsUtils(object):
...
@@ -731,7 +731,7 @@ class EsUtils(object):
"type"
:
"short"
"type"
:
"short"
},
},
"img_type"
:
{
"img_type"
:
{
"type"
:
"
integer
"
"type"
:
"
keyword
"
},
},
"activity_type"
:
{
"activity_type"
:
{
"type"
:
"keyword"
"type"
:
"keyword"
...
@@ -1041,6 +1041,9 @@ class EsUtils(object):
...
@@ -1041,6 +1041,9 @@ class EsUtils(object):
},
},
"bought_month_yoy"
:
{
"bought_month_yoy"
:
{
"type"
:
"float"
"type"
:
"float"
},
"img_type_arr"
:
{
"type"
:
"integer"
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment