Commit 80347e49 by Peng

no message

parent 68e0f97e
......@@ -54,21 +54,14 @@ class dow_category_Product():
print("强制关闭chrome.exe失败:", e)
port = 9222
params_ = ""
# params_ = "--blink-settings=imagesEnabled=false"
os.system(f'start Chrome {params_} --remote-debugging-port={port}')
chrome_options = Options()
# 禁止加载图片
chrome_options.add_argument('--blink-settings=imagesEnabled=false')
chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{port}")
driver = webdriver.Chrome(r'chromedriver.exe', options=chrome_options)
# 无界面模式
# chrome_options.add_argument('--headless')
# 禁用 GPU,加快在 headless 下的渲染
chrome_options.add_argument('--disable-gpu')
# 禁用沙箱,规避权限问题
chrome_options.add_argument('--no‑sandbox')
# 改用 /tmp 而不是 /dev/shm(避免共享内存不足)
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(r'chromedriver.exe', options=chrome_options)
self.get_category(site, driver)
def get_category(self, site, driver):
......@@ -138,21 +131,6 @@ class dow_category_Product():
if Product_name[0] in self.click_product_name_list:
print(product_nums, "已经抓取::", Product_name[0].upper())
continue
driver.execute_script("localStorage.clear();") # 清除本地存储
time.sleep(0.5)
driver.execute_script(
"caches.keys().then(function(names) { for (let name of names) { caches.delete(name); } });")
driver.execute_script("window.performance.clearResourceTimings();")
time.sleep(0.5)
# 假设你已经有了 driver
# 先 enable heap profiler
driver.execute_cdp_cmd('HeapProfiler.enable', {})
# 然后强制 GC
driver.execute_cdp_cmd('HeapProfiler.collectGarbage', {})
# 最后可选地 disable 掉
driver.execute_cdp_cmd('HeapProfiler.disable', {})
time.sleep(0.5)
self.click_product_name_list.append(Product_name[0])
self.update_cagetory_state = False
driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()")
......@@ -395,36 +373,6 @@ class dow_category_Product():
while True:
try:
if save_Category_list:
# with self.engine_mysql.begin() as conn_mysql:
# for i in save_Category_list:
# dele_sql = f"DELETE from {site}_aba_profit_category_insights where category='{i[0]}' and product_type='{i[1]}' and item_type_keyword='{i[2]}' and year_week='{self.y_w}'"
# print('删除删除mysql:', dele_sql)
# conn_mysql.execute(dele_sql)
# df = pd.DataFrame(data=save_Category_list,
# columns=['category', "product_type", "item_type_keyword",
# "search_ratio", "product_average", "return_ratio",
# "return_product_average", "year_week", 'sellers',
# 'new_brands',
# 'asin', 'new_asin', 'per_asin', 'advertisement_spend',
# 'star_ratings', 'new_brands_int', 'asin_int',
# 'new_asin_int', 'per_asin_int', 'five_star',
# 'three_star', 'two_star', 'one_star', 'ad_spend',
# 'majority_spend', 'most_popular_keywords_item',
# 'reasons_returns_json', 'top_data_json',
# 'news_data_json',
# 'top_sales_amount', 'top_sales_volume',
# 'top_search_ratio',
# 'top_return_ratio', 'top_adv_spend',
# 'top_majority_spend',
# 'news_sales_amount',
# 'news_sales_volume',
# 'news_search_ratio', 'news_return_ratio',
# 'news_adv_spend',
# 'news_majority_spend'
# ])
# self.engine_mysql.to_sql(df, f'{site}_aba_profit_category_insights',
# if_exists="append")
# print('存储成功 mysql')
with self.engine_pg.begin() as conn_pg:
for i in save_Category_list:
dele_sql = f"DELETE from {site}_aba_profit_category_insights where category='{i[0]}' and product_type='{i[1]}' and item_type_keyword='{i[2]}' and year_week='{self.y_w}'"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment