Commit 849cc1d0 by Peng

修改了店铺类目分析新增2个板块。

修改了数据库连接密码
补充数据召回商品数据
tk修改连接密码。重新打包
parent b03eefc8
......@@ -155,11 +155,13 @@ class dow_category_Product():
def get_category_data(self, Category_list, driver, site):
print('Category_list:::', Category_list)
num = 0
print('click_product_name_list::',click_product_name_list)
for Category in Category_list:
self.cilik_site(driver)
product_nums = 0
click_product_name_list=[]
print(Category, ' 22222222222222222222222222222222222222')
try:
num += 1
Category_name = Category
# _Category = Category.replace('&', '\\\&')
......@@ -171,7 +173,6 @@ class dow_category_Product():
'//h2[contains(text(),"Product Type")]/following-sibling::div/div')
product_nums = 0
for Product_Type in Product_Type_list:
product_nums+=1
time.sleep(0.5)
driver.execute_script("localStorage.clear();") # 清除本地存储
time.sleep(0.5)
......@@ -193,10 +194,10 @@ class dow_category_Product():
save_Category_list = []
Product_name = Product_Type.xpath('./@id')
print("Product_name3222222222::", Product_name[0].upper())
print('click_product_name_list::', click_product_name_list)
if Product_name[0] in click_product_name_list:
continue
click_product_name_list.append(Product_name[0])
# print('click_product_name_list::', click_product_name_list)
# if Product_name[0] in click_product_name_list:
# continue
# click_product_name_list.append(Product_name[0])
driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()")
time.sleep(2)
html = etree.HTML(driver.page_source)
......@@ -381,18 +382,18 @@ class dow_category_Product():
print('转成int')
print([big_brand_int, big_asin_int,
big_new_asin_int, big_per_asin_int])
top_data_json = self.new_top_grossing(driver, 'Top')
time.sleep(1)
news_data_json = self.new_top_grossing(driver, 'News')
time.sleep(1)
# top_data_json = self.new_top_grossing(driver, 'Top')
# time.sleep(1)
# news_data_json = self.new_top_grossing(driver, 'News')
# time.sleep(1)
save_Category_list.append(
[Category_name, Product_name[0], Keyword[0], float(search_ratio),
float(product_average), float(return_ratio), float(return_product_average),
self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int,
big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star,
one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json,
top_data_json, news_data_json])
one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json
])
print('数据:', save_Category_list)
except:
print('============ 下标。超出 。 ==========')
......@@ -416,7 +417,7 @@ class dow_category_Product():
'new_asin_int', 'per_asin_int', 'five_star',
'three_star', 'two_star', 'one_star', 'ad_spend',
'majority_spend', 'most_popular_keywords_item',
'reasons_returns_json', 'top_data_json', 'news_data_json'
'reasons_returns_json'
])
df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_mysql,
if_exists="append", index=False)
......@@ -436,8 +437,7 @@ class dow_category_Product():
'new_asin_int', 'per_asin_int', 'five_star',
'three_star', 'two_star', 'one_star', 'ad_spend',
'majority_spend', 'most_popular_keywords_item',
'reasons_returns_json', 'top_data_json',
'news_data_json'])
'reasons_returns_json'])
df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_pg,
if_exists="append", index=False)
print(save_Category_list)
......@@ -451,13 +451,15 @@ class dow_category_Product():
print('============ 产品分类 下标。超出 。无数据 ==========', f"\n{traceback.format_exc()}")
time.sleep(2)
continue
if product_nums>15:
time.sleep(2)
print('product_nums 重新启动 浏览器,')
driver.close()
driver.quit()
time.sleep(2)
self.run()
# product_nums+=1
# if product_nums>10:
# time.sleep(2)
# print(product_nums, 'product_nums 重新启动 浏览器,')
# driver.close()
# driver.quit()
# time.sleep(2)
# product_nums = 0
# self.run()
except Exception as e:
print(e, '执行错误')
time.sleep(random.uniform(10, 20))
......
......@@ -41,7 +41,7 @@ class One688LoginSpider(object):
self.engine_us_mysql = create_engine( url)
self.engine_pg = create_engine(
f"postgresql+psycopg2://postgres:T#4$4%qPbR7mJx@113.100.143.162:5432/{db}",
f"postgresql+psycopg2://postgres:F9kL2sXe81rZq@113.100.143.162:5432/{db}",
encoding='utf-8')
return self.engine_us_mysql
......@@ -156,7 +156,7 @@ class One688LoginSpider(object):
'商机探测器', '是']]
df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
columns=['site_name', 'date_info', 'status', 'status_val',
'table_name', 'date_type', 'page', 'is_end'])
'table_name', 'report_date', 'page', 'is_end'])
df_seller_asin_account.to_sql('workflow_everyday', con=self.engine_us_mysql, if_exists='append',
index=False)
......
......@@ -155,10 +155,10 @@ class count_all_syn_st_id(BaseUtils):
if __name__ == '__main__':
import time
# 根据 engine 选择那个库。爬虫库 14, 抓取me搜索词是6,爬虫一般使用14,根据情况调整
month = 6
month = 7
engine_db_num = 14
# for site in ['de','uk']:
for site in ['de']:
for site in ['uk']:
time.sleep(0)
count_all_syn_st_id(site_name=site,month=month).get_minid_maxid()
# count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()
......
......@@ -98,7 +98,7 @@ def long_time_task(site, proxy_name, month):
if __name__ == '__main__':
pppoe_ip()
site_list = ['us','de', 'uk']
site_list = ['us','de','uk']
month = int(sys.argv[1])
week = int(sys.argv[2])
proxy_name = None
......
......@@ -477,7 +477,8 @@ class ParseSearchTermUs(object):
def parse_ac(self):
try:
asin_list = self.etree_html.xpath(
'//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id')
'//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id|//span[contains(@id,"-amazons-choice")]/@id')
print('ac_asin_list:::',asin_list)
if len(asin_list):
asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9] # 有重复
self.ac_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='ac'))
......
......@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
# self.page_chrome.quit()
if __name__ == '__main__':
......
......@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
# self.page_chrome.quit()
if __name__ == '__main__':
......
......@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
# self.page_chrome.quit()
if __name__ == '__main__':
......
......@@ -327,7 +327,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
# self.page_chrome.quit()
if __name__ == '__main__':
......
......@@ -333,7 +333,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_edge.quit()
# self.page_edge.quit()
if __name__ == '__main__':
TkVideo().run()
......
......@@ -330,7 +330,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_edge.quit()
# self.page_edge.quit()
if __name__ == '__main__':
TkVideo().run()
......
......@@ -324,11 +324,7 @@ class TkVideo():
self.connect_redis()
self.get_datetime()
self.get_day()
print('完成关闭浏览器')
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
if __name__ == '__main__':
......
......@@ -344,7 +344,7 @@ class TkVideo():
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_edge.quit()
# self.page_edge.quit()
if __name__ == '__main__':
TkVideo().run()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment