update_workflow_progress=f"update workflow_progress set status_val=3,status='抓取结束' where page='反查搜索词' and date_info='2026-{week}' and site_name='{site}' and date_type='week'"
update_workflow_progress=f"update workflow_progress set status_val=3,status='抓取结束' where page='反查搜索词' and date_info='2026-{week}' and site_name='{site}' and date_type='week'"
select_day_status_val=f"select status_val from workflow_progress where date_type='day' and page='ASIN详情' and site_name='us' and date_info='{next_date}' and kafka_flow_state=1 and spider_state=1"
sql_select_=f"select status_val from workflow_progress where date_type='day' and page='ASIN详情' and site_name='{site_name}' and date_info='{next_date}' and kafka_flow_state=1 and spider_state=1"
print('sql_select 2222222:',sql_select_)
df_status_dict=engine_mysql.read_sql(sql_select_)
ifnotdf_status_dict.empty:
print('查询kafka是否开启',df_status_dict.status_val[0])
ifdf_status_dict.status_val[0]in(1,2):
update_month_spider_state=f"update workflow_progress set spider_state=2,status_val=2 WHERE site_name='{site_name}' and date_type='day' and date_info='{next_date}' and page='ASIN详情'"
sql_read=f"select id from workflow_progress where date_type='day' and page='ASIN详情' and site_name='{site_name}' and date_info='{next_date}' and kafka_flow_state=3 and spider_state=2"
print('等待es启动::',sql_read)
df_report_date=engine_mysql.read_sql(sql_read)
ifnotdf_report_date.empty:
print('抓取 day asin')
break
else:
n+=1
time.sleep(120)
ifn>25:
break
except:
time.sleep(10)
ifnum_state==3:
print('抓取完成 抓取完成')
iflock_state.acquire(blocking=True):
sql_read=f"select id from workflow_progress where date_type='day' and page='ASIN详情' and site_name='{site_name}' and date_info='{next_date}' and kafka_flow_state=3 and spider_state=2"
print(sql_read,'抓取完成 修改状态 333333444444')
df_report_id=engine_mysql.read_sql(sql_read)
ifnotdf_report_id.empty:
update_month_spider_state=f"update workflow_progress set spider_state=3,status_val=3 WHERE site_name='{site_name}' and date_type='day' and date_info='{next_date}' and page='ASIN详情'"
select_state1_sql=f"select status_val from workflow_progress where site_name='{site}' and date_info='{year_week}' and date_type='week' and page='反查搜索词'"
select_sate_sql=f"select status_val from workflow_progress where site_name='{site}' and date_info='{year_week}' and date_type='week' and page='ASIN详情'"
update_workflow_progress=f"update workflow_progress set status_val=2,status='抓取中' where page='ASIN详情' and date_info='2025-{week}' and site_name='{site}' and date_type='week' and status_val in(1,0)"
# SELECT * from workflow_progress WHERE site_name='us' and page='asin详情' and date_type='month' and status_val=1 and status='月ASIN导出完成 and date_info='
update_month_asin_state=f"update workflow_progress set status_val=3,status='月ASIN抓取完成' WHERE site_name='{site}' and page='asin详情' and date_type='month' and status_val=1 and status='月ASIN导出完成' and date_info='{year_month}'"
update_month_asin_state=f"update workflow_progress set status_val=3,status='月ASIN抓取完成' WHERE site_name='{site}' and page='asin详情' and date_type='month' and status_val=1 and status='月ASIN导出完成' and date_info='{year_month}'"