Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
Amazon-Selection-Data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
abel_cjy
Amazon-Selection-Data
Commits
4d2af0c5
Commit
4d2af0c5
authored
Jun 09, 2025
by
fangxingjun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改排名解析规则+新增imgUrl字段到插件
parent
42821c6d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
8 deletions
+9
-8
kafka_asin_detail.py
Pyspark_job/my_kafka/kafka_asin_detail.py
+9
-8
No files found.
Pyspark_job/my_kafka/kafka_asin_detail.py
View file @
4d2af0c5
...
...
@@ -95,12 +95,12 @@ class DimStAsinInfo(Templates):
"it"
:
"(
\
d+).*?Visualizza i Top 100 nella categoria "
}
# 匹配一级分类的排名
self
.
pattern_str
=
{
"us"
:
"(
\
d+ in [
\
w&' ]+)"
,
"uk"
:
"(
\
d+ in [
\
w&' ]+)"
,
"de"
:
"Nr. (
\
d+ in [
\
w&' ]+)"
,
"es"
:
"nº(
\
d+ en [
\
w&' ]+)"
,
"fr"
:
"(
\
d+ en [
\
w&' ]+)"
,
"it"
:
"n. (
\
d+ in [
\
w&' ]+)"
,
"us"
:
"(
\
d+ in [
\
w&'
-
]+)"
,
"uk"
:
"(
\
d+ in [
\
w&'
-
]+)"
,
"de"
:
"Nr. (
\
d+ in [
\
w&'
-
]+)"
,
"es"
:
"nº(
\
d+ en [
\
w&'
-
]+)"
,
"fr"
:
"(
\
d+ en [
\
w&'
-
]+)"
,
"it"
:
"n. (
\
d+ in [
\
w&'
-
]+)"
,
}
# 匹配排名和分类
self
.
replace_str
=
{
"us"
:
"See Top 100 in "
,
...
...
@@ -373,7 +373,7 @@ class DimStAsinInfo(Templates):
def
clean_kafka_df
(
df
):
df
=
df
.
withColumnRenamed
(
"seller_id"
,
"account_id"
)
# |asin_zr_flow_proportion|asin_ao_val|asin_amazon_orders|variant_info|matrix_flow_proportion|matrix_ao_val|
df
=
df
.
select
(
"asin"
,
"parentAsin"
,
"title"
,
"variat_num"
,
"best_sellers_rank"
,
"best_sellers_herf"
,
"price"
,
"rating"
,
df
=
df
.
select
(
"asin"
,
"parentAsin"
,
"title"
,
"
img_url"
,
"
variat_num"
,
"best_sellers_rank"
,
"best_sellers_herf"
,
"price"
,
"rating"
,
"brand"
,
"account_id"
,
"account_name"
,
"account_url"
,
"buy_box_seller_type"
,
"volume"
,
"weight"
,
"weight_str"
,
"launch_time"
,
"total_comments"
,
"page_inventory"
,
"asinUpdateTime"
,
"site_name"
,
"node_id"
,
"buy_sales"
,
'asin_amazon_orders'
,
'asin_ao_val'
,
'matrix_ao_val'
,
"asin_zr_flow_proportion"
,
'matrix_flow_proportion'
)
...
...
@@ -389,6 +389,7 @@ class DimStAsinInfo(Templates):
)
df
.
show
(
5
,
truncate
=
False
)
df
=
df
.
withColumnRenamed
(
"img_url"
,
"imgUrl"
)
df
=
df
.
withColumnRenamed
(
"variat_num"
,
"asinVarNum"
)
df
=
df
.
withColumnRenamed
(
"asin_bs_cate_1_rank"
,
"oneCategoryRank"
)
df
=
df
.
withColumnRenamed
(
"rank_and_category"
,
"bestSellersRank"
)
# 解析后的
...
...
@@ -411,7 +412,7 @@ class DimStAsinInfo(Templates):
df
=
df
.
withColumnRenamed
(
"asin_zr_flow_proportion"
,
"asinZrFlowProportion"
)
df
=
df
.
withColumnRenamed
(
"matrix_flow_proportion"
,
"asinZrFlowProportionMatrix"
)
# df = df.withColumnRenamed("fd_country_name", "fdCountryName")
df
=
df
.
select
(
'asin'
,
'parentAsin'
,
'title'
,
'asinVarNum'
,
'oneCategoryRank'
,
'bestSellersRank'
,
'lastHerf'
,
'aoVal'
,
'matrixAoVal'
,
'price'
,
'rating'
,
df
=
df
.
select
(
'asin'
,
'parentAsin'
,
'title'
,
"imgUrl"
,
'asinVarNum'
,
'oneCategoryRank'
,
'bestSellersRank'
,
'lastHerf'
,
'aoVal'
,
'matrixAoVal'
,
'price'
,
'rating'
,
'bsrOrders'
,
'bsrOrdersSale'
,
'brandName'
,
'accountId'
,
'accountName'
,
'accountUrl'
,
'buyBoxSellerType'
,
'volume'
,
'weight'
,
'launchTime'
,
'totalComments'
,
'pageInventory'
,
'asinUpdateTime'
,
'asinBoughtMonth'
,
"asinAmazonOrders"
,
"fdCountryName"
,
"key_outer"
,
"key_inner"
,
"volumeFormat"
,
"weightFormat"
,
"isSelfAsin"
,
"auctionsNum"
,
"skusNumCreat"
,
"asinZrFlowProportion"
,
"asinZrFlowProportionMatrix"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment