Commit 44057a7b by Peng

no message

parent 6360879f
......@@ -41,7 +41,6 @@ class dow_category_Product:
self.engine_us_mysql = None
self.engine_pg = None
self.num = 0
week = time.strftime("%W")
yaer = time.strftime('%Y', time.localtime(time.time()))
self.y_w = f"{yaer}-{week}"
......@@ -52,26 +51,14 @@ class dow_category_Product:
self.engine_us_mysql = get_remote_engine(site_name='us', db_type='mysql')
self.engine_pg = get_remote_engine(site_name=site, db_type='postgresql_15_outer')
self.num = 0
week = time.strftime("%W")
yaer = time.strftime('%Y', time.localtime(time.time()))
self.y_w = f"{yaer}-{week}"
# ---------------------- Driver 管理 & 稳定层 ----------------------
def _kill_edge_process(self):
# 保持你原行为:强制关闭 msedge,减少 profile 被锁导致的奇怪崩溃
try:
os.system("taskkill /F /IM msedge.exe")
except Exception as e:
print("强制关闭msedge.exe失败:", e)
def _build_driver(self):
print('初始化')
try:
pr_name = "msedge.exe"
os.system('%s%s' % ("taskkill /F /IM ", pr_name))
except Exception as e:
print("强制关闭chrome.exe失败:", e)
os.system("taskkill /F /IM msedge.exe")
time.sleep(2)
edge_options = Options()
......@@ -82,7 +69,7 @@ class dow_category_Product:
edge_options.add_argument(r'--user-data-dir=C:\Users\FLA主账号客服维权使用\AppData\Local\Microsoft\Edge\User Data')
edge_options.add_argument('--profile-directory=Default')
# 降低“首次运行/恢复弹窗/扩展”对启动的干扰(不影响登录态)
# 降低"首次运行/恢复弹窗/扩展"对启动的干扰(不影响登录态)
edge_options.add_argument("--no-first-run")
edge_options.add_argument("--no-default-browser-check")
edge_options.add_argument("--disable-extensions")
......@@ -128,6 +115,16 @@ class dow_category_Product:
def _jitter(self, a=0.6, b=1.6):
time.sleep(random.uniform(a, b))
def _check_login_redirect(self, driver):
"""检测是否被重定向到登录页,是则直接退出程序。"""
try:
url = driver.current_url.lower()
if 'signin' in url or 'sign-in' in url or 'ap/signin' in url:
print('[退出] 检测到登录页,session 已过期,请重新登录后再运行')
sys.exit(1)
except Exception:
pass
def _safe_action(self, action_name, fn, driver, site=None):
"""
统一的稳定执行器:
......@@ -148,25 +145,28 @@ class dow_category_Product:
msg = str(e).lower()
print(f"[{action_name}] 第{attempt}次失败:{e}")
# 常见“页面崩溃/断连/渲染挂掉”关键词
# 常见"页面崩溃/断连/渲染挂掉"关键词
crash_like = any(x in msg for x in [
"page crash", "crash", "renderer", "disconnected",
"not connected to devtools", "session deleted",
"cannot determine loading status", "target window already closed"
])
# Timeout(页面未加载/JS超时)也值得先 refresh 一次,避免直接重启整个浏览器
should_retry = crash_like or isinstance(e, TimeoutException)
# 先尝试 refresh(刷新后能继续跑)
if attempt < self.MAX_ACTION_RETRY and crash_like:
if attempt < self.MAX_ACTION_RETRY and should_retry:
try:
print(f"[{action_name}] 尝试 refresh 恢复...")
driver.refresh()
self._jitter(3, 6)
# 恢复后重新定位到 category-insights
# 恢复后重新定位到 category-insights
if site:
self._ensure_category_insights(driver, site)
continue
except Exception as e2:
print(f"[{action_name}] refresh 也失败:{e2}")
print(f"[{action_name}] refresh 也失败,触发重启:{e2}")
raise NeedRestart(f"[{action_name}] refresh 失败:{e2}")
# 走到这里:说明需要重启 driver
break
except Exception as e:
......@@ -203,6 +203,7 @@ class dow_category_Product:
return True
self._safe_action("open_category_insights", _open, driver, site=site)
self._check_login_redirect(driver) # 导航完成后检测是否被重定向到登录页
def _click_site():
self._select_site_radio(driver, site)
......@@ -219,7 +220,6 @@ class dow_category_Product:
time.sleep(1)
html = etree.HTML(driver.page_source)
self.save_category(html)
print(333333333333333333333333)
global syn_state
syn_state = True
......@@ -239,7 +239,7 @@ class dow_category_Product:
for Category in Category_list:
try:
# 关键:每个大循环都做一次“崩溃检测+必要时恢复”
# 关键:每个大循环都做一次"崩溃检测+必要时恢复"
self._safe_action("loop_healthcheck", lambda: True, driver, site=site)
self.cilik_site(driver)
......@@ -343,8 +343,10 @@ class dow_category_Product:
'//div[@class="big-text-section-name"][1]/div[@class="big-text"]/text()'
)
if ratio_list:
search_ratio = re.findall(r'(.*?)‰', ratio_list[0])[0]
return_ratio = re.findall(r'(.*?)%', ratio_list[1])[0]
_sr = re.findall(r'(.*?)‰', ratio_list[0])
search_ratio = _sr[0] if _sr else None
_rr = re.findall(r'(.*?)%', ratio_list[1]) if len(ratio_list) > 1 else []
return_ratio = _rr[0] if _rr else None
else:
search_ratio = None
return_ratio = None
......@@ -353,8 +355,10 @@ class dow_category_Product:
'//div[@class="big-text-section-name"][1]/div[@class="sub-text"]/text()'
)
if product_ratio_list:
product_average = re.findall(r'(.*?)‰', product_ratio_list[0])[0]
return_product_average = re.findall(r'(.*?)%', product_ratio_list[1])[0]
_pa = re.findall(r'(.*?)‰', product_ratio_list[0])
product_average = _pa[0] if _pa else None
_rpa = re.findall(r'(.*?)%', product_ratio_list[1]) if len(product_ratio_list) > 1 else []
return_product_average = _rpa[0] if _rpa else None
else:
product_average = None
return_product_average = None
......@@ -403,11 +407,11 @@ class dow_category_Product:
total = 0.0
for num_str in sta_list:
total += float(num_str)
results = [float(num) / total if float(num) != 0 else 0 for num in sta_list]
five_star = round(results[0], 2)
three_star = round(results[1], 2)
two_star = round(results[2], 2)
one_star = round(results[3], 2)
results = [float(num) / total if total > 0 and float(num) != 0 else 0 for num in sta_list]
five_star = round(results[0], 2) if len(results) > 0 else 0
three_star = round(results[1], 2) if len(results) > 1 else 0
two_star = round(results[2], 2) if len(results) > 2 else 0
one_star = round(results[3], 2) if len(results) > 3 else 0
else:
five_star = 0
three_star = 0
......@@ -425,7 +429,7 @@ class dow_category_Product:
pattern = ''
matches_list = re.findall(pattern, big_text_Advertisement)
ad_spend = matches_list[0] if matches_list else None
majority_spend = matches_list[1] if matches_list else None
majority_spend = matches_list[1] if len(matches_list) > 1 else None
else:
ad_spend = 0
majority_spend = 0
......@@ -487,7 +491,10 @@ class dow_category_Product:
save_Category_list.append([
Category_name, Product_name[0], Keyword[0],
float(search_ratio), float(product_average), float(return_ratio), float(return_product_average),
float(search_ratio) if search_ratio is not None else None,
float(product_average) if product_average is not None else None,
float(return_ratio) if return_ratio is not None else None,
float(return_product_average) if return_product_average is not None else None,
self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int,
big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star,
......@@ -533,6 +540,7 @@ class dow_category_Product:
'news_adv_spend', 'news_majority_spend'
])
df = df.where(pd.notnull(df), None)
self.engine_pg.to_sql(df, f'{site}_aba_profit_category_insights', if_exists="append")
print('存储成功 pg')
break
......@@ -571,7 +579,6 @@ class dow_category_Product:
driver.close()
driver.quit()
print('重新启动 浏览器,')
self.run()
raise NeedRestart("num>1 trigger restart")
except NeedRestart as e:
......@@ -606,7 +613,8 @@ class dow_category_Product:
majority_spend = 0
if input:
products_aggregate_sales = input.get('products_aggregate_sales', [])[0]
_agg = input.get('products_aggregate_sales', [])
products_aggregate_sales = _agg[0] if _agg else None
if products_aggregate_sales:
split = products_aggregate_sales.split("|")
sales_amount_str = self.safe_get(split, 1, '').partition("$")[-1]
......@@ -683,8 +691,10 @@ class dow_category_Product:
most_popular_keyword_list = []
if most_popular_list:
for most_popular in most_popular_list:
most_popular_keyword = most_popular.xpath('.//div[2]/text()')[0]
most_popular_b_nums = most_popular.xpath('.//div/b/text()')[0]
_kw_list = most_popular.xpath('.//div[2]/text()')
most_popular_keyword = _kw_list[0] if _kw_list else None
_bn_list = most_popular.xpath('.//div/b/text()')
most_popular_b_nums = _bn_list[0] if _bn_list else None
most_popular_keyword_list.append({
"most_popular_keywords": most_popular_keyword,
'most_popular_search_nums': most_popular_b_nums
......@@ -692,8 +702,10 @@ class dow_category_Product:
top_ratio_list = html_top.xpath('//div[@class="big-text-section-name"][1]/div[@class="big-text"]/text()')
if top_ratio_list:
search_ratio = re.findall(r'(.*?)‰', top_ratio_list[0])[0]
return_ratio = re.findall(r'(.*?)%', top_ratio_list[1])[0]
_sr = re.findall(r'(.*?)‰', top_ratio_list[0])
search_ratio = _sr[0] if _sr else None
_rr = re.findall(r'(.*?)%', top_ratio_list[1]) if len(top_ratio_list) > 1 else []
return_ratio = _rr[0] if _rr else None
else:
search_ratio = None
return_ratio = None
......@@ -739,6 +751,8 @@ class dow_category_Product:
time.sleep(1.5)
return self.analysis_top_Newly_html(driver)
return self._safe_action("click_newly_launched", _do, driver, site=self.site_name)
except NeedRestart:
raise
except Exception:
return "{}"
......@@ -747,19 +761,31 @@ class dow_category_Product:
self.mysql_connect(site=self.site_name)
select_sql = 'select category from seller_category_insights_syn where state =1'
df = self.engine_pg.read_sql(select_sql)
category_list = list(df.category)
print(category_list)
if category_list:
return category_list
if df.shape[0] > 0:
category_list = list(df.category)
print(category_list)
if category_list:
return category_list
else:
return None
else:
self.mysql_connect()
workflow_everyday_list = [
[self.site_name, self.y_w, '类目分析抓取完成', 3, f'{self.site_name}_aba_profit_category_insights', 'week', '类目分析', '是']
]
df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list, columns=[
'site_name', 'date_info', 'status', 'status_val', 'table_name', 'date_type', 'page', 'is_end'
])
self.engine_us_mysql.to_sql(df_seller_asin_account, 'workflow_progress', if_exists='append')
for i in range(5):
try:
self.mysql_connect()
workflow_everyday_list = [
[self.site_name, self.y_w, '类目分析抓取完成', 3, f'{self.site_name}_aba_profit_category_insights', 'week', '类目分析', '是']
]
print(workflow_everyday_list)
df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list, columns=[
'site_name', 'date_info', 'status', 'status_val', 'table_name', 'date_type', 'page', 'is_end'
])
self.engine_us_mysql.to_sql(df_seller_asin_account, 'workflow_progress', if_exists='append')
break
except:
print('存储报错 类目分析抓取完成')
self.mysql_connect(site=self.site_name)
time.sleep(10)
return None
def save_category(self, html):
Category_list = html.xpath('//h2[contains(text(),"Category")]/following-sibling::div/div')
......@@ -769,6 +795,8 @@ class dow_category_Product:
for Category in Category_list:
Category_name = Category.xpath('./@id')
Category_label = Category.xpath('.//@label')
if not Category_name or not Category_label:
continue
self.category_item[Category_label[0]] = Category_name[0]
Categorys_list.append(Category_name[0])
Categorys_list_syn.append([Category_label[0]])
......@@ -779,7 +807,7 @@ class dow_category_Product:
if syn_state is False:
now = datetime.datetime.now()
is_monday = (now.weekday() == 0)
is_9_am = (now.hour == 11) # 保持你原判断
is_9_am = (now.hour == 11)
if is_monday and is_9_am:
TRUNCATE_SQL = 'TRUNCATE seller_category_insights_syn'
conn.execute(TRUNCATE_SQL)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment