Commit d4dde086 by Peng

本次主要解决页面加载完成没有显示详情进行判断不在往下走进行报错了。新增15s内页面源码没有该id进行重新请求页面。

parent 2a634fbb
...@@ -66,6 +66,38 @@ class H10(): ...@@ -66,6 +66,38 @@ class H10():
s.connect(('baidu.com', 0)) s.connect(('baidu.com', 0))
ip = s.getsockname()[0] ip = s.getsockname()[0]
# You are viewing a demo of Cerebro # You are viewing a demo of Cerebro
"""
H10测试账号
账号:yswg006@hotmail.com # 124 126 共用
密码:Chianbugye@8346148
yswg304@outlook.com # 122
Chinabuye@467138
YSWGHF422023@outlook.com # 244
soundasia422023@
CherryY2023@outlook.com # 245
20230322Yy@
H10961961@outlook.com # 246
soundasia961961@
X18756082657@outlook.com # 247
Zyx13075039897@
wretyu2023@outlook.com # 127
Sffgserter@1
a18756082657@outlook.com # 121
12345678Ll@
账号:yashengweige678@outlook.com # 120
密码:987654321yswg@
账号:yswg12345678@outlook.com # 126 信用卡有问题
密码:yswg654321@
"""
user_pw_dict = { user_pw_dict = {
'192.168.10.244': [r'C:\Users\win10-244\Downloads', 'YSWGHF422023@outlook.com', 'soundasia422023@'], '192.168.10.244': [r'C:\Users\win10-244\Downloads', 'YSWGHF422023@outlook.com', 'soundasia422023@'],
'192.168.10.245': [r'C:\Users\win10-245\Downloads', 'CherryY2023@outlook.com', '20230322Yy@'], '192.168.10.245': [r'C:\Users\win10-245\Downloads', 'CherryY2023@outlook.com', '20230322Yy@'],
...@@ -75,7 +107,7 @@ class H10(): ...@@ -75,7 +107,7 @@ class H10():
'192.168.0.121': [r'C:\Users\1\Downloads', 'a18756082657@outlook.com', '12345678Ll@'], '192.168.0.121': [r'C:\Users\1\Downloads', 'a18756082657@outlook.com', '12345678Ll@'],
'192.168.0.126': [r'C:\Users\Administrator\Downloads', 'yswg12345678@outlook.com', 'yswg654321@'], '192.168.0.126': [r'C:\Users\Administrator\Downloads', 'yswg12345678@outlook.com', 'yswg654321@'],
'192.168.0.127': [r'C:\Users\1\Downloads', 'wretyu2023@outlook.com', 'Sffgserter@1'], '192.168.0.127': [r'C:\Users\1\Downloads', 'wretyu2023@outlook.com', 'Sffgserter@1'],
'192.168.0.122': [r'C:\Users\1\Downloads', 'yashengweige678@outlook.com', '987654321yswg@'], '192.168.0.122': [r'C:\Users\1\Downloads', 'yswg304@outlook.com', 'Chinabuye@467138'],
'192.168.0.124': [r'C:\Users\1\Downloads', 'yswg006@hotmail.com', 'Chianbugye@8346148'], '192.168.0.124': [r'C:\Users\1\Downloads', 'yswg006@hotmail.com', 'Chianbugye@8346148'],
} }
user_pw_list = user_pw_dict.get(ip) user_pw_list = user_pw_dict.get(ip)
...@@ -395,9 +427,19 @@ class H10(): ...@@ -395,9 +427,19 @@ class H10():
except: except:
pass pass
def wait_page(self, timeout=30):
start = time.time()
wait = WebDriverWait(self.driver, timeout)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="re-container"]//input')))
print("wait_page ok, used:", time.time() - start)
return True
except TimeoutException:
print("wait_page timeout, used:", time.time() - start)
return False
def webdrvier_html(self, asin, asinstype): def webdrvier_html(self, asin, asinstype):
# 点击选择站点 # 点击选择站点
for i in range(4): for i in range(5):
try: try:
_url = self.driver.current_url _url = self.driver.current_url
if "concurrent-sessions" in _url or 'signin' in _url: if "concurrent-sessions" in _url or 'signin' in _url:
...@@ -405,7 +447,10 @@ class H10(): ...@@ -405,7 +447,10 @@ class H10():
if asin not in self.err_asin_list and self.useremail_state: if asin not in self.err_asin_list and self.useremail_state:
print('cerebro界面', self.site_name_url) print('cerebro界面', self.site_name_url)
self.driver.get(f'https://members.helium10.com/cerebro?accountId={self.account_id}') self.driver.get(f'https://members.helium10.com/cerebro?accountId={self.account_id}')
time.sleep(10) if not self.wait_page(timeout=15):
print('页面未加载出来')
continue
time.sleep(2)
if 'You are viewing a demo of Cerebro' in self.driver.page_source: if 'You are viewing a demo of Cerebro' in self.driver.page_source:
print(self.email_name, '账号过期') print(self.email_name, '账号过期')
self.driver.refresh() self.driver.refresh()
...@@ -448,11 +493,11 @@ class H10(): ...@@ -448,11 +493,11 @@ class H10():
try: try:
self.driver.execute_script( self.driver.execute_script(
f"""document.querySelector("img[loading='lazy']").click()""") f"""document.querySelector("img[loading='lazy']").click()""")
time.sleep(1.5) time.sleep(1)
except: except:
self.driver.execute_script( self.driver.execute_script(
f"""document.querySelector("img[alt='{alt}']").click()""") f"""document.querySelector("img[alt='{alt}']").click()""")
time.sleep(1.5) time.sleep(1)
self.verify() self.verify()
# 切换站点 # 切换站点
self.driver.execute_script(f"""document.querySelector("div[data-value='{host}']").click()""") self.driver.execute_script(f"""document.querySelector("div[data-value='{host}']").click()""")
...@@ -472,7 +517,7 @@ class H10(): ...@@ -472,7 +517,7 @@ class H10():
# 点击 get keyword # 点击 get keyword
time.sleep(1) time.sleep(1)
self.driver.execute_script('document.querySelector("#CerebroSearchButtons > button").click()') self.driver.execute_script('document.querySelector("#CerebroSearchButtons > button").click()')
time.sleep(2) time.sleep(1)
html = self.driver.page_source html = self.driver.page_source
if 'You have reached the limit of the uses' in html: if 'You have reached the limit of the uses' in html:
self.useremail_state = False self.useremail_state = False
...@@ -493,7 +538,7 @@ class H10(): ...@@ -493,7 +538,7 @@ class H10():
time.sleep(2) time.sleep(2)
try: try:
if 'searched this product before' in html or '先前已搜索过此产品' in html: if 'searched this product before' in html or '先前已搜索过此产品' in html:
print('33333333333') print('33333333333444444')
self.driver.execute_script( self.driver.execute_script(
"""document.querySelector("button[data-testid='runnewsearch']").click()""") """document.querySelector("button[data-testid='runnewsearch']").click()""")
sleep(randint(3, 8)) sleep(randint(3, 8))
...@@ -502,7 +547,7 @@ class H10(): ...@@ -502,7 +547,7 @@ class H10():
print('点击 run 报错') print('点击 run 报错')
# 点击下载 # 点击下载
self.driver.execute_script('window.scrollBy(0, 300);') self.driver.execute_script('window.scrollBy(0, 300);')
time.sleep(2) time.sleep(1)
html = self.driver.page_source html = self.driver.page_source
if 'You have reached the limit of the uses' in html: if 'You have reached the limit of the uses' in html:
self.useremail_state = False self.useremail_state = False
...@@ -518,10 +563,9 @@ class H10(): ...@@ -518,10 +563,9 @@ class H10():
break break
elif 'errorCodes.undefined' in html: elif 'errorCodes.undefined' in html:
continue continue
sleep(randint(13, 28)) sleep(randint(15, 30))
time.sleep(5)
self.verify() self.verify()
time.sleep(2.5) time.sleep(2)
if 'Wrong entered data or no results' in html: if 'Wrong entered data or no results' in html:
print('没有报告可下载2222', asin) print('没有报告可下载2222', asin)
self.err_asin_list.append(asin) self.err_asin_list.append(asin)
...@@ -532,17 +576,26 @@ class H10(): ...@@ -532,17 +576,26 @@ class H10():
break break
elif 'errorCodes.undefined' in html: elif 'errorCodes.undefined' in html:
continue continue
time.sleep(5) elif 'errors.common.502' in html:
html = self.driver.page_source print('没有报告可下载333', asin)
resp = etree.HTML(html) self.err_asin_list.append(asin)
try: break
div_class = resp.xpath(
'''//div[contains(text(),"Amazon Choice")]/parent::div/following-sibling::div/@class|//div[contains(text(),"Amazon's Choice")]/parent::div/following-sibling::div/@class''')
except:
print('报错22222222222222')
if asinstype: if asinstype:
time.sleep(2) try:
print('点击显示下拉框')
button_js = 'document.querySelector("#CerebroFilter > div > div.sc-dzXNMW.dufncf > div.sc-hFCjLd.igMWUF > div > button").click()'
self.driver.execute_script(button_js)
time.sleep(2)
html = self.driver.page_source
resp = etree.HTML(html)
print('Amazons Choice获取元素')
time.sleep(2)
div_class = resp.xpath(
'''//div[contains(text(),"Amazon Choice")]/parent::div/following-sibling::div/@class|//div[contains(text(),"Amazon's Choice")]/parent::div/following-sibling::div/@class''')
except:
print('报错22222222222222')
print('点击选择亚马逊精选 勾选') print('点击选择亚马逊精选 勾选')
time.sleep(2)
try: try:
script = f""" script = f"""
const elements = document.querySelectorAll("div[class='{div_class[0]}']>div"); const elements = document.querySelectorAll("div[class='{div_class[0]}']>div");
...@@ -553,7 +606,7 @@ class H10(): ...@@ -553,7 +606,7 @@ class H10():
if i == 2: if i == 2:
self.err_asins_adv_list.append(asin) self.err_asins_adv_list.append(asin)
self.driver.execute_script(script) self.driver.execute_script(script)
time.sleep(2) time.sleep(1)
html1 = self.driver.page_source html1 = self.driver.page_source
resp1 = etree.HTML(html1) resp1 = etree.HTML(html1)
span_class = resp1.xpath( span_class = resp1.xpath(
...@@ -561,15 +614,15 @@ class H10(): ...@@ -561,15 +614,15 @@ class H10():
# 选择亚马逊精选参数1 # 选择亚马逊精选参数1
self.driver.execute_script( self.driver.execute_script(
f"""document.querySelector("div[class='{span_class}']").click()""") f"""document.querySelector("div[class='{span_class}']").click()""")
time.sleep(2) time.sleep(1)
# 选择亚马逊精选参数2 # 选择亚马逊精选参数2
self.driver.execute_script( self.driver.execute_script(
f"""document.querySelector("div[class='{span_class}']").click()""") f"""document.querySelector("div[class='{span_class}']").click()""")
time.sleep(2) time.sleep(1)
# 点击添加 # 点击添加
self.driver.execute_script( self.driver.execute_script(
"""document.querySelector("button[data-testid='applyfilters']").click()""") """document.querySelector("button[data-testid='applyfilters']").click()""")
time.sleep(6.5) time.sleep(3)
# 下载报告 # 下载报告
# 点击下载csv按钮 # 点击下载csv按钮
self.driver.execute_script( self.driver.execute_script(
...@@ -742,57 +795,6 @@ class H10(): ...@@ -742,57 +795,6 @@ class H10():
print('重新下载文件222:', asin, path) print('重新下载文件222:', asin, path)
self.webdrvier_html(asin, None) self.webdrvier_html(asin, None)
self.if_csv_path(file_path) self.if_csv_path(file_path)
# columns = pd.read_csv(file_path, nrows=0).columns.tolist()
#
# def contains_chinese(text):
# return bool(re.search(r'[\u4e00-\u9fff]', text))
# is_chinese_header = any(contains_chinese(col) for col in columns)
# if is_chinese_header:
# print("表头是中文")
# columns_to_include_zh = ['关键词词组', 'Cerebro IQ 得分', '搜索量', '搜索量趋势',
# '广告推广ASIN 数',
# '竞品数', 'CPR', '标题密度', '亚马逊推荐', '自然',
# '亚马逊推荐排名', '广告排名', '自然排名']
# df = pd.read_csv(file_path, usecols=columns_to_include_zh)
# # 中文 -> 英文映射
# df.rename(columns={
# '关键词词组': 'keyword',
# 'Cerebro IQ 得分': 'cerebro_iq_score',
# '搜索量': 'search_volume',
# '搜索量趋势': 'search_volume_trend',
# '广告推广ASIN 数': 'sponsored_asins',
# '竞品数': 'competing_product',
# 'CPR': 'cpr',
# '标题密度': 'title_desity',
# '亚马逊推荐': 'amazon_recommended',
# '自然': 'organic',
# '亚马逊推荐排名': 'amazon_recommended_rank',
# '广告排名': 'sponsored_rank',
# '自然排名': 'organic_rank'
# }, inplace=True)
# else:
# print("表头是英文")
# columns_to_include_en = ['Keyword Phrase', 'Cerebro IQ Score', 'Search Volume', 'Search Volume Trend',
# 'Sponsored ASINs',
# 'Competing Products', 'CPR', 'Title Density', 'Amazon Recommended', 'Organic',
# 'Amazon Rec. Rank', 'Sponsored Rank', 'Organic Rank']
# df = pd.read_csv(file_path, usecols=columns_to_include_en)
# df.rename(columns={
# 'Keyword Phrase': 'keyword',
# 'Cerebro IQ Score': 'cerebro_iq_score',
# 'Search Volume': 'search_volume',
# 'Search Volume Trend': 'search_volume_trend',
# 'Sponsored ASINs': 'sponsored_asins',
# 'Competing Products': 'competing_product',
# 'CPR': 'cpr',
# 'Title Density': 'title_desity',
# 'Amazon Recommended': 'amazon_recommended',
# 'Organic': 'organic',
# 'Amazon Rec. Rank': 'amazon_recommended_rank',
# 'Sponsored Rank': 'sponsored_rank',
# 'Organic Rank': 'organic_rank'
# }, inplace=True)
header_config = { header_config = {
"chinese": { "chinese": {
"columns": ['关键词词组', 'Cerebro IQ 得分', '搜索量', '搜索量趋势', "columns": ['关键词词组', 'Cerebro IQ 得分', '搜索量', '搜索量趋势',
...@@ -973,6 +975,15 @@ class H10(): ...@@ -973,6 +975,15 @@ class H10():
previous_date_str = previous_date.strftime("%Y-%m-%d") previous_date_str = previous_date.strftime("%Y-%m-%d")
file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin_list[0]}_{previous_date_str}.csv' file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin_list[0]}_{previous_date_str}.csv'
print('file_pathsave_competition2222', file_path) print('file_pathsave_competition2222', file_path)
state = self.if_csv_path(file_path)
if state == False:
time.sleep(3)
file_path = fr'{path}\{self.site_name_csv.upper()}_AMAZON_cerebro_{asin_list[0]}_{time_strftime}.csv'
print('file_pathsave_competition3333', file_path)
state = self.if_csv_path(file_path)
if state == False:
self.nex_page(self.asin_list, asinstype=1)
# 创建一个字典来映射原始列名和新的列名 # 创建一个字典来映射原始列名和新的列名
columns = pd.read_csv(file_path, nrows=0).columns.tolist() columns = pd.read_csv(file_path, nrows=0).columns.tolist()
def contains_chinese(text): def contains_chinese(text):
...@@ -1079,8 +1090,8 @@ class H10(): ...@@ -1079,8 +1090,8 @@ class H10():
else: else:
path = r'C:\Users\ASUS\Downloads' path = r'C:\Users\ASUS\Downloads'
print('当前路径:', path) print('当前路径:', path)
self.email_name = 'yashengweige678@outlook.com' self.email_name = 'yswg006@hotmail.com'
self.pw = '987654321yswg@' # 'yashengweige678@outlook.com', '987654321yswg@' self.pw = 'Chianbugye@8346148' # 'yashengweige678@outlook.com', '987654321yswg@'
self.web_drver() self.web_drver()
while True: while True:
self.data = {} self.data = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment