select_p_id=f"select t3.id,t4.en_name from (select t1.id,t1.parent_id,if(find_in_set(parent_id, @pids) > 0, @pids := concat(@pids, ',',id), 0) as ischild from (select id,p_id as parent_id from {self.site_name}_bs_category t order by p_id,id) t1,(select @pids := {id[0]}) t2) t3 LEFT JOIN {self.site_name}_bs_category t4 on t3.id = t4.id where ischild != 0;"
ai_sql1=f"update {site}_bs_category set category_id = '{items['category_id']}',category_parent_id='{items['category_parent_id']}',category_first_id='{items['category_first_id']}' where id={items['id']}"
print(ai_sql1)
conn_6.execute(ai_sql1)
defdele_self_real_spider(self):
print('每天晚上定时删除贺哲的抓取表。用户已经取消收藏店铺')
select_sql='select data_id from user_collection_syn where data_type =2'
# with open(rf'{self.site_name}_22_{asin}.html', 'w', encoding='utf-8')as f:
# f.write(resp.text)
ifself.reuests_para_val.check_amazon_yzm(resp):
print('出现验证码,。asin---> ',asin)
ifself.spider_state=='竞品asin':
...
...
@@ -331,41 +328,31 @@ class ai_async_asin_pg():
defread_ai_asin(self):
self.pg_connect()
self.spider_type=True
self.spider_type=True
formodulein['Amazon:asin','Amazon:asinList']:
ifmodule=='Amazon:asin':
# pass
sql=f"SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module='{module}' and spider_status='未开始' for update;"
else:
sql=f"""SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module = '{module}' and spider_status='未开始' for update;"""
# sql = f"""SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module = '{module}' and task_id=39 for update;"""
print(sql)
df_read=self.engine_pg.read_then_update(
select_sql=sql,
update_table='ai_asin_analyze_log',
set_values={"spider_status":'爬取中'},# 把库存清零
where_keys=["task_id"],# WHERE sku = :sku
)
whileTrue:
try:
ifmodule=='Amazon:asin':
sql=f"SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module='{module}' and spider_status='未开始' for update;"
else:
sql=f"""SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module = '{module}' and spider_status='未开始' for update;"""
# sql = f"""SELECT elem->>'asin' AS asin,task_id,site_name FROM ai_asin_analyze_log,LATERAL json_array_elements(input_params) elem WHERE module = '{module}' and task_id=39 for update;"""
select_sql=f"""select id, site_name, task_id, unique_key as asin,sub_step from ai_asin_analyze_spider where sub_step = 'AsinInfoRepository:详情' and status = '未开始' and site_name='{site}' order by task_id"""
select_sql=f"""select id, site_name, task_id, unique_key as asin,sub_step from ai_asin_analyze_spider where sub_step = 'AsinInfoRepository:详情' and status = '未开始' and site_name='{site}' order by task_id"""