Commit 849cc1d0 by Peng

修改了店铺类目分析新增2个板块。

修改了数据库连接密码
补充数据召回商品数据
tk修改连接密码。重新打包
parent b03eefc8
...@@ -155,11 +155,13 @@ class dow_category_Product(): ...@@ -155,11 +155,13 @@ class dow_category_Product():
def get_category_data(self, Category_list, driver, site): def get_category_data(self, Category_list, driver, site):
print('Category_list:::', Category_list) print('Category_list:::', Category_list)
num = 0 num = 0
print('click_product_name_list::',click_product_name_list)
for Category in Category_list: for Category in Category_list:
self.cilik_site(driver) self.cilik_site(driver)
product_nums = 0
click_product_name_list=[]
print(Category, ' 22222222222222222222222222222222222222') print(Category, ' 22222222222222222222222222222222222222')
try: try:
num += 1 num += 1
Category_name = Category Category_name = Category
# _Category = Category.replace('&', '\\\&') # _Category = Category.replace('&', '\\\&')
...@@ -171,7 +173,6 @@ class dow_category_Product(): ...@@ -171,7 +173,6 @@ class dow_category_Product():
'//h2[contains(text(),"Product Type")]/following-sibling::div/div') '//h2[contains(text(),"Product Type")]/following-sibling::div/div')
product_nums = 0 product_nums = 0
for Product_Type in Product_Type_list: for Product_Type in Product_Type_list:
product_nums+=1
time.sleep(0.5) time.sleep(0.5)
driver.execute_script("localStorage.clear();") # 清除本地存储 driver.execute_script("localStorage.clear();") # 清除本地存储
time.sleep(0.5) time.sleep(0.5)
...@@ -193,10 +194,10 @@ class dow_category_Product(): ...@@ -193,10 +194,10 @@ class dow_category_Product():
save_Category_list = [] save_Category_list = []
Product_name = Product_Type.xpath('./@id') Product_name = Product_Type.xpath('./@id')
print("Product_name3222222222::", Product_name[0].upper()) print("Product_name3222222222::", Product_name[0].upper())
print('click_product_name_list::', click_product_name_list) # print('click_product_name_list::', click_product_name_list)
if Product_name[0] in click_product_name_list: # if Product_name[0] in click_product_name_list:
continue # continue
click_product_name_list.append(Product_name[0]) # click_product_name_list.append(Product_name[0])
driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()") driver.execute_script(f"document.querySelector('#{Product_name[0]} > kat-radiobutton').click()")
time.sleep(2) time.sleep(2)
html = etree.HTML(driver.page_source) html = etree.HTML(driver.page_source)
...@@ -381,18 +382,18 @@ class dow_category_Product(): ...@@ -381,18 +382,18 @@ class dow_category_Product():
print('转成int') print('转成int')
print([big_brand_int, big_asin_int, print([big_brand_int, big_asin_int,
big_new_asin_int, big_per_asin_int]) big_new_asin_int, big_per_asin_int])
top_data_json = self.new_top_grossing(driver, 'Top') # top_data_json = self.new_top_grossing(driver, 'Top')
time.sleep(1) # time.sleep(1)
news_data_json = self.new_top_grossing(driver, 'News') # news_data_json = self.new_top_grossing(driver, 'News')
time.sleep(1) # time.sleep(1)
save_Category_list.append( save_Category_list.append(
[Category_name, Product_name[0], Keyword[0], float(search_ratio), [Category_name, Product_name[0], Keyword[0], float(search_ratio),
float(product_average), float(return_ratio), float(return_product_average), float(product_average), float(return_ratio), float(return_product_average),
self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin, self.y_w, big_text_sller, big_text_brand, big_text_asin, big_text_new_asin,
big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int, big_text_per_asin, big_text_Advertisement, big_text_star, big_brand_int,
big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star, big_asin_int, big_new_asin_int, big_per_asin_int, five_star, three_star, two_star,
one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json, one_star, ad_spend, majority_spend, most_popular_json_dict, reasons_returns_json
top_data_json, news_data_json]) ])
print('数据:', save_Category_list) print('数据:', save_Category_list)
except: except:
print('============ 下标。超出 。 ==========') print('============ 下标。超出 。 ==========')
...@@ -416,7 +417,7 @@ class dow_category_Product(): ...@@ -416,7 +417,7 @@ class dow_category_Product():
'new_asin_int', 'per_asin_int', 'five_star', 'new_asin_int', 'per_asin_int', 'five_star',
'three_star', 'two_star', 'one_star', 'ad_spend', 'three_star', 'two_star', 'one_star', 'ad_spend',
'majority_spend', 'most_popular_keywords_item', 'majority_spend', 'most_popular_keywords_item',
'reasons_returns_json', 'top_data_json', 'news_data_json' 'reasons_returns_json'
]) ])
df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_mysql, df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_mysql,
if_exists="append", index=False) if_exists="append", index=False)
...@@ -436,8 +437,7 @@ class dow_category_Product(): ...@@ -436,8 +437,7 @@ class dow_category_Product():
'new_asin_int', 'per_asin_int', 'five_star', 'new_asin_int', 'per_asin_int', 'five_star',
'three_star', 'two_star', 'one_star', 'ad_spend', 'three_star', 'two_star', 'one_star', 'ad_spend',
'majority_spend', 'most_popular_keywords_item', 'majority_spend', 'most_popular_keywords_item',
'reasons_returns_json', 'top_data_json', 'reasons_returns_json'])
'news_data_json'])
df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_pg, df.to_sql(f'{site}_aba_profit_category_insights', con=self.engine_pg,
if_exists="append", index=False) if_exists="append", index=False)
print(save_Category_list) print(save_Category_list)
...@@ -451,13 +451,15 @@ class dow_category_Product(): ...@@ -451,13 +451,15 @@ class dow_category_Product():
print('============ 产品分类 下标。超出 。无数据 ==========', f"\n{traceback.format_exc()}") print('============ 产品分类 下标。超出 。无数据 ==========', f"\n{traceback.format_exc()}")
time.sleep(2) time.sleep(2)
continue continue
if product_nums>15: # product_nums+=1
time.sleep(2) # if product_nums>10:
print('product_nums 重新启动 浏览器,') # time.sleep(2)
driver.close() # print(product_nums, 'product_nums 重新启动 浏览器,')
driver.quit() # driver.close()
time.sleep(2) # driver.quit()
self.run() # time.sleep(2)
# product_nums = 0
# self.run()
except Exception as e: except Exception as e:
print(e, '执行错误') print(e, '执行错误')
time.sleep(random.uniform(10, 20)) time.sleep(random.uniform(10, 20))
......
...@@ -41,7 +41,7 @@ class One688LoginSpider(object): ...@@ -41,7 +41,7 @@ class One688LoginSpider(object):
self.engine_us_mysql = create_engine( url) self.engine_us_mysql = create_engine( url)
self.engine_pg = create_engine( self.engine_pg = create_engine(
f"postgresql+psycopg2://postgres:T#4$4%qPbR7mJx@113.100.143.162:5432/{db}", f"postgresql+psycopg2://postgres:F9kL2sXe81rZq@113.100.143.162:5432/{db}",
encoding='utf-8') encoding='utf-8')
return self.engine_us_mysql return self.engine_us_mysql
...@@ -156,7 +156,7 @@ class One688LoginSpider(object): ...@@ -156,7 +156,7 @@ class One688LoginSpider(object):
'商机探测器', '是']] '商机探测器', '是']]
df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list, df_seller_asin_account = pd.DataFrame(data=workflow_everyday_list,
columns=['site_name', 'date_info', 'status', 'status_val', columns=['site_name', 'date_info', 'status', 'status_val',
'table_name', 'date_type', 'page', 'is_end']) 'table_name', 'report_date', 'page', 'is_end'])
df_seller_asin_account.to_sql('workflow_everyday', con=self.engine_us_mysql, if_exists='append', df_seller_asin_account.to_sql('workflow_everyday', con=self.engine_us_mysql, if_exists='append',
index=False) index=False)
......
...@@ -155,10 +155,10 @@ class count_all_syn_st_id(BaseUtils): ...@@ -155,10 +155,10 @@ class count_all_syn_st_id(BaseUtils):
if __name__ == '__main__': if __name__ == '__main__':
import time import time
# 根据 engine 选择那个库。爬虫库 14, 抓取me搜索词是6,爬虫一般使用14,根据情况调整 # 根据 engine 选择那个库。爬虫库 14, 抓取me搜索词是6,爬虫一般使用14,根据情况调整
month = 6 month = 7
engine_db_num = 14 engine_db_num = 14
# for site in ['de','uk']: # for site in ['de','uk']:
for site in ['de']: for site in ['uk']:
time.sleep(0) time.sleep(0)
count_all_syn_st_id(site_name=site,month=month).get_minid_maxid() count_all_syn_st_id(site_name=site,month=month).get_minid_maxid()
# count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn() # count_all_syn_st_id(site_name=site,month=month,engine_db_num=engine_db_num).search_term_syn()
......
...@@ -98,7 +98,7 @@ def long_time_task(site, proxy_name, month): ...@@ -98,7 +98,7 @@ def long_time_task(site, proxy_name, month):
if __name__ == '__main__': if __name__ == '__main__':
pppoe_ip() pppoe_ip()
site_list = ['us','de', 'uk'] site_list = ['us','de','uk']
month = int(sys.argv[1]) month = int(sys.argv[1])
week = int(sys.argv[2]) week = int(sys.argv[2])
proxy_name = None proxy_name = None
......
...@@ -477,7 +477,8 @@ class ParseSearchTermUs(object): ...@@ -477,7 +477,8 @@ class ParseSearchTermUs(object):
def parse_ac(self): def parse_ac(self):
try: try:
asin_list = self.etree_html.xpath( asin_list = self.etree_html.xpath(
'//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id') '//span[@data-a-badge-color="sx-gulfstream" and @aria-hidden="true" ]//@id|//span[@data-a-badge-color="sx-gulfstream"]//@id|//span[contains(@id,"-amazons-choice-label")]/@id|//span[contains(@id,"-amazons-choice")]/@id')
print('ac_asin_list:::',asin_list)
if len(asin_list): if len(asin_list):
asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9] # 有重复 asin_list = [asin.split("-")[0] for asin in asin_list if len(asin.split("-")[0]) >= 9] # 有重复
self.ac_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='ac')) self.ac_list.extend(self.parse_type_common(asin_list=asin_list, cate_type='ac'))
......
...@@ -326,7 +326,7 @@ class TkVideo(): ...@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit() # self.page_chrome.quit()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -326,7 +326,7 @@ class TkVideo(): ...@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit() # self.page_chrome.quit()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -326,7 +326,7 @@ class TkVideo(): ...@@ -326,7 +326,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit() # self.page_chrome.quit()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -327,7 +327,7 @@ class TkVideo(): ...@@ -327,7 +327,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit() # self.page_chrome.quit()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -333,7 +333,7 @@ class TkVideo(): ...@@ -333,7 +333,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_edge.quit() # self.page_edge.quit()
if __name__ == '__main__': if __name__ == '__main__':
TkVideo().run() TkVideo().run()
......
...@@ -330,7 +330,7 @@ class TkVideo(): ...@@ -330,7 +330,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_edge.quit() # self.page_edge.quit()
if __name__ == '__main__': if __name__ == '__main__':
TkVideo().run() TkVideo().run()
......
...@@ -324,11 +324,7 @@ class TkVideo(): ...@@ -324,11 +324,7 @@ class TkVideo():
self.connect_redis() self.connect_redis()
self.get_datetime() self.get_datetime()
self.get_day() self.get_day()
print('完成关闭浏览器')
time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver
self.page_chrome.quit()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -344,7 +344,7 @@ class TkVideo(): ...@@ -344,7 +344,7 @@ class TkVideo():
time.sleep(3) time.sleep(3)
# 如果 ChromiumPage 底层保存了 browser 对象 # 如果 ChromiumPage 底层保存了 browser 对象
# 或者如果它是基于 Selenium WebDriver # 或者如果它是基于 Selenium WebDriver
self.page_edge.quit() # self.page_edge.quit()
if __name__ == '__main__': if __name__ == '__main__':
TkVideo().run() TkVideo().run()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment