SELECT MAX(batch_id) as initial_batch_id from {self.doris_db}.{self.max_bought_month_table} WHERE date_info='{self.date_info}' AND consumer_type='{self.consumer_type}'
CASE WHEN weight > 0 AND weight * 16 <= 16 AND asin_length > 0 AND asin_length <= 15 AND asin_width > 0 AND asin_width <= 12 AND asin_height > 0 AND asin_height <= 0.75 THEN 1
WHEN weight > 0 AND weight <= 20 AND asin_length > 0 AND asin_length <= 18 AND asin_width > 0 AND asin_width <= 14 AND asin_height > 0 AND asin_height <= 8 THEN 2
WHEN weight > 0 AND weight <= 70 AND asin_length > 0 AND asin_length <= 60 AND asin_width > 0 AND asin_width <= 30 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 130 THEN 3
WHEN weight > 0 AND weight <= 150 AND asin_length > 0 AND asin_length <= 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 130 THEN 4
WHEN weight > 0 AND weight <= 150 AND asin_length > 0 AND asin_length <= 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 165 THEN 5
WHEN weight > 150 AND asin_length > 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 > 165 THEN 6 ELSE 0 END"""
else:
expr_str=f"""
CASE WHEN weight > 0 AND weight <= 100 AND asin_length > 0 AND asin_length <= 20 AND asin_width > 0 AND asin_width <= 15 AND asin_height > 0 AND asin_height <= 1 THEN 1
WHEN weight > 0 AND weight <= 500 AND asin_length > 0 AND asin_length <= 33 AND asin_width > 0 AND asin_width <= 23 AND asin_height > 0 AND asin_height <= 2.5 THEN 2
WHEN weight > 0 AND weight <= 1000 AND asin_length > 0 AND asin_length <= 33 AND asin_width > 0 AND asin_width <= 23 AND asin_height > 0 AND asin_height <= 5 THEN 3
WHEN weight > 0 AND weight <= 12000 AND asin_length > 0 AND asin_length <= 45 AND asin_width > 0 AND asin_width <= 34 AND asin_height > 0 AND asin_height <= 26 THEN 4
WHEN weight > 0 AND weight <= 2000 AND asin_length > 0 AND asin_length <= 61 AND asin_width > 0 AND asin_width <= 46 AND asin_height > 0 AND asin_height <= 46 THEN 5
WHEN asin_length > 0 AND asin_length <= 150 AND asin_length + asin_length + (asin_width + asin_height) <= 300 THEN 6
WHEN asin_length > 150 AND asin_length + asin_length + (asin_width + asin_height) > 300 THEN 7 ELSE 0 END"""
F.expr(f"""CASE WHEN asin_bs_cate_current_id is not null THEN 1 ELSE 0 END""")) \
.withColumn("zr_rating",F.expr(f"""CASE WHEN zr_counts > 0 THEN 0.5 ELSE 0 END""")) \
.withColumn("sp_rating",F.expr(f"""CASE WHEN sp_counts > 0 THEN 1 ELSE 0 END""")) \
.withColumn("a_add_rating",F.expr(f"""CASE WHEN img_type like '%3%' THEN 1 ELSE 0 END""")) \
.withColumn("video_rating",F.expr(f"""CASE WHEN img_type like '%2%' THEN 0.5 ELSE 0 END""")) \
.withColumn("brand_rating",F.expr(f"""CASE WHEN is_brand_label = 1 THEN 0.2 ELSE 0 END""")) \
.withColumn("product_describe_rating",
F.expr(f"""CASE WHEN product_description is not null THEN 0.2 ELSE 0 END""")) \
.withColumn("highlight_rating",F.expr(f"""
CASE WHEN describe is not null AND size(split(describe, '\\|-\\|')) <= 4 THEN size(split(describe, '\\|-\\|')) * 0.4
WHEN describe is not null AND size(split(describe, '\\|-\\|')) > 4 THEN 1.6 ELSE 0 END""")) \
.withColumn("title_len_rating",F.expr(f"""CASE WHEN title_len >= 50 AND title_len <=200 THEN 0.5 ELSE 0 END""")) \
.withColumn("title_brand_rating",F.expr(f"""
CASE WHEN brand is not null AND lower(regexp_replace(title, '[^a-zA-Z0-9\\s]', '')) LIKE lower(regexp_replace(brand, '[^a-zA-Z0-9\\s]', '')) || '%' THEN 0.5
ELSE 0 END""")) \
.withColumn("img_num_rating",F.expr(f"""
CASE WHEN img_num <= 4 THEN img_num * 0.5 WHEN img_num >4 THEN 2 ELSE 0 END""")) \
.withColumn("img_enlarge_rating",F.expr(f"""CASE WHEN image_view = 1 THEN 0.5 ELSE 0 END"""))
CASE WHEN limit_rank is null and asin_bs_cate_1_rank <= 500000 THEN 1 WHEN limit_rank is not null and asin_bs_cate_1_rank <= limit_rank THEN 1 ELSE 0 END"""
)).drop("limit_rank")
# 5. 是否必需ASIN
df=df.withColumn("is_need_asin",F.expr("""
CASE WHEN asin_bs_cate_1_id in ('mobile-apps', 'audible', 'books', 'music', 'dmusic', 'digital-text', 'magazines', 'movies-tv', 'software', 'videogames', 'amazon-devices', 'boost', 'us-live-explorations', 'amazon-renewed') THEN 1
WHEN asin NOT LIKE 'B0%' THEN 1
ELSE 0 END"""))
# 6. asin_type
df=df.withColumn("asin_type",F.expr("""
CASE WHEN is_self_asin=1 THEN 1 WHEN is_need_asin=1 THEN 2 WHEN is_hide_asin=1 THEN 3 ELSE 0 END"""
SELECT MAX(batch_id) as initial_batch_id from {self.doris_db}.{self.max_bought_month_table} WHERE date_info='{self.date_info}' AND consumer_type='{self.consumer_type}'
CASE WHEN weight > 0 AND weight * 16 <= 16 AND asin_length > 0 AND asin_length <= 15 AND asin_width > 0 AND asin_width <= 12 AND asin_height > 0 AND asin_height <= 0.75 THEN 1
WHEN weight > 0 AND weight <= 20 AND asin_length > 0 AND asin_length <= 18 AND asin_width > 0 AND asin_width <= 14 AND asin_height > 0 AND asin_height <= 8 THEN 2
WHEN weight > 0 AND weight <= 70 AND asin_length > 0 AND asin_length <= 60 AND asin_width > 0 AND asin_width <= 30 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 130 THEN 3
WHEN weight > 0 AND weight <= 150 AND asin_length > 0 AND asin_length <= 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 130 THEN 4
WHEN weight > 0 AND weight <= 150 AND asin_length > 0 AND asin_length <= 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 <= 165 THEN 5
WHEN weight > 150 AND asin_length > 108 AND asin_length + asin_length + (asin_width + asin_height) * 2 > 165 THEN 6 ELSE 0 END"""
else:
expr_str=f"""
CASE WHEN weight > 0 AND weight <= 100 AND asin_length > 0 AND asin_length <= 20 AND asin_width > 0 AND asin_width <= 15 AND asin_height > 0 AND asin_height <= 1 THEN 1
WHEN weight > 0 AND weight <= 500 AND asin_length > 0 AND asin_length <= 33 AND asin_width > 0 AND asin_width <= 23 AND asin_height > 0 AND asin_height <= 2.5 THEN 2
WHEN weight > 0 AND weight <= 1000 AND asin_length > 0 AND asin_length <= 33 AND asin_width > 0 AND asin_width <= 23 AND asin_height > 0 AND asin_height <= 5 THEN 3
WHEN weight > 0 AND weight <= 12000 AND asin_length > 0 AND asin_length <= 45 AND asin_width > 0 AND asin_width <= 34 AND asin_height > 0 AND asin_height <= 26 THEN 4
WHEN weight > 0 AND weight <= 2000 AND asin_length > 0 AND asin_length <= 61 AND asin_width > 0 AND asin_width <= 46 AND asin_height > 0 AND asin_height <= 46 THEN 5
WHEN asin_length > 0 AND asin_length <= 150 AND asin_length + asin_length + (asin_width + asin_height) <= 300 THEN 6
WHEN asin_length > 150 AND asin_length + asin_length + (asin_width + asin_height) > 300 THEN 7 ELSE 0 END"""
F.expr(f"""CASE WHEN asin_bs_cate_current_id is not null THEN 1 ELSE 0 END""")) \
.withColumn("zr_rating",F.expr(f"""CASE WHEN zr_counts > 0 THEN 0.5 ELSE 0 END""")) \
.withColumn("sp_rating",F.expr(f"""CASE WHEN sp_counts > 0 THEN 1 ELSE 0 END""")) \
.withColumn("a_add_rating",F.expr(f"""CASE WHEN img_type like '%3%' THEN 1 ELSE 0 END""")) \
.withColumn("video_rating",F.expr(f"""CASE WHEN img_type like '%2%' THEN 0.5 ELSE 0 END""")) \
.withColumn("brand_rating",F.expr(f"""CASE WHEN is_brand_label = 1 THEN 0.2 ELSE 0 END""")) \
.withColumn("product_describe_rating",
F.expr(f"""CASE WHEN product_description is not null THEN 0.2 ELSE 0 END""")) \
.withColumn("highlight_rating",F.expr(f"""
CASE WHEN describe is not null AND size(split(describe, '\\|-\\|')) <= 4 THEN size(split(describe, '\\|-\\|')) * 0.4
WHEN describe is not null AND size(split(describe, '\\|-\\|')) > 4 THEN 1.6 ELSE 0 END""")) \
.withColumn("title_len_rating",F.expr(f"""CASE WHEN title_len >= 50 AND title_len <=200 THEN 0.5 ELSE 0 END""")) \
.withColumn("title_brand_rating",F.expr(f"""
CASE WHEN brand is not null AND lower(regexp_replace(title, '[^a-zA-Z0-9\\s]', '')) LIKE lower(regexp_replace(brand, '[^a-zA-Z0-9\\s]', '')) || '%' THEN 0.5
ELSE 0 END""")) \
.withColumn("img_num_rating",F.expr(f"""
CASE WHEN img_num <= 4 THEN img_num * 0.5 WHEN img_num >4 THEN 2 ELSE 0 END""")) \
.withColumn("img_enlarge_rating",F.expr(f"""CASE WHEN image_view = 1 THEN 0.5 ELSE 0 END"""))
CASE WHEN limit_rank is null and asin_bs_cate_1_rank <= 500000 THEN 1 WHEN limit_rank is not null and asin_bs_cate_1_rank <= limit_rank THEN 1 ELSE 0 END"""
)).drop("limit_rank")
# 5. 是否必需ASIN
df=df.withColumn("is_need_asin",F.expr("""
CASE WHEN asin_bs_cate_1_id in ('mobile-apps', 'audible', 'books', 'music', 'dmusic', 'digital-text', 'magazines', 'movies-tv', 'software', 'videogames', 'amazon-devices', 'boost', 'us-live-explorations', 'amazon-renewed') THEN 1
WHEN asin NOT LIKE 'B0%' THEN 1
ELSE 0 END"""))
# 6. asin_type
df=df.withColumn("asin_type",F.expr("""
CASE WHEN is_self_asin=1 THEN 1 WHEN is_need_asin=1 THEN 2 WHEN is_hide_asin=1 THEN 3 ELSE 0 END"""
(select asin, lower(label) as label, created_time,row_number() over(partition by asin,label order by updated_time desc) as crank
from ods_other_search_term_data where site_name='{self.site_name}' and date_type='30day' and trim(label) not in ('null','') and label is not null) t where t.crank=1