import os
import sys

sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
from secure_db_client import get_remote_engine

from curl_cffi import requests
from utils.db_connect import BaseUtils
import re
from lxml import etree

os.environ['NO_PROXY'] = 'amazon.com'
import json
from urllib.parse import urlparse




class Amazon_reviewer():
    def __init__(self, site_name='us'):
        if site_name == "us":
            self.site_url = 'https://www.amazon.com'
            self.host = 'www.amazon.com'
        elif site_name == 'uk':
            self.site_url = 'https://www.amazon.co.uk'  # 站点url
            self.host = 'www.amazon.co.uk'
        elif site_name == 'de':
            self.site_url = 'https://www.amazon.de'
            self.host = 'www.amazon.de'
        elif site_name == 'fr':
            self.site_url = 'https://www.amazon.fr'
            self.host = 'www.amazon.fr'
        elif site_name == 'es':
            self.site_url = 'https://www.amazon.es'
            self.host = 'www.amazon.es'
        elif site_name == 'it':
            self.site_url = 'https://www.amazon.it'
            self.host = 'www.amazon.it'

    def pg_connect(self):
        engine_pg15 = get_remote_engine(
            site_name='us',  # -> database "selection"
            db_type='postgresql_15_outer',  # -> 服务端 alias "mysql"
        )
        return engine_pg15

    def redis_db(self):
        redis14_ = BaseUtils().redis_db()
        headers_json = redis14_.get('amaozn_login_dict')
        self.cookeis_dict = json.loads(headers_json)
        redis14_.close()

    def get_asin_reviewer(self, asin='0740303090'):
        headers = {
            'authority': urlparse(self.site_url).hostname,
            'host': self.host,
            "x-requested-with": "XMLHttpRequest",
            "accept": "text/html,*/*",
            "content-type": "application/x-www-form-urlencoded;charset=UTF-8",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
            "origin": self.site_url,
            "accept-language": "zh-CN,zh;q=0.9",
        }
        url = f'{self.site_url}/product-reviews/B00CX547FE/ref=cm_cr_getr_d_paging_btm_next_1?sortBy=recent&pageNumber=1'
        response = requests.get(url, headers=headers, cookies=self.cookeis_dict)
        resp = etree.HTML(response.text)
        with open(r'C:\Users\ASUS\Desktop\text.html', 'w', encoding='utf-8')as f:
            f.write(response.text)
        div_list = resp.xpath("//div[@id='cm_cr-review_list']/ul/li")
        for div in div_list:
            user_href_list = div.xpath(".//div[@class='a-row a-spacing-mini']/a/@href")
            user_href = self.site_url + user_href_list[0] if user_href_list else None
            user_img_list = div.xpath(".//div[@class='a-row a-spacing-mini']//img/@data-src")
            user_img = self.site_url + user_img_list[0] if user_img_list else None
            user_name_list = div.xpath(".//div[@class='a-row a-spacing-mini']//span[@class='a-profile-name']/text()")
            user_name = user_name_list[0] if user_name_list else None
            review_star_rating_list = div.xpath(".//div[@class='a-row']//i[@data-hook='review-star-rating']//text()")
            review_star_rating = review_star_rating_list[0] if review_star_rating_list else None
            review_title_list = div.xpath(".//div[@class='a-row']//a/span/text()")
            review_title = review_title_list[0] if review_title_list else None
            review_date_list = div.xpath(".//span[@data-hook='review-date']/text()")
            review_date = review_date_list[0] if review_date_list else None
            review_href_list = div.xpath(".//div[@class='a-row']//a/@href")
            review_href = self.site_url + review_href_list[0] if review_href_list else None
            var_data_list = div.xpath(".//div[@class='a-row a-spacing-mini review-data review-format-strip']//a/text()")
            var_data = '||'.join(var_data_list) if var_data_list else None
            var_asin_list = div.xpath(".//div[@class='a-row a-spacing-mini review-data review-format-strip']//a/@href")
            if var_asin_list:
                varasin_list = re.findall(r'reviews/(.*)/ref', var_asin_list[0])
                var_asin = varasin_list[0] if varasin_list else None
            else:
                var_asin = None
            vp_list = div.xpath(".//a[contains(@aria-label,'Verified Purchase')]//span/text()")
            verified_purchase = vp_list[0] if vp_list else None
            review_data_list = div.xpath(
                ".//div[@class='a-row a-spacing-small review-data']/span[@data-hook='review-body']//text()")
            review_data_list = ''.join(review_data_list).strip()
            review_data = review_data_list if review_data_list else None

            review_img_list = div.xpath(".//img[@data-hook='review-image-tile']/@src")
            print('review_img_list::', review_img_list)
            if review_img_list:
                review_img = ','.join(review_img_list).strip()
            else:
                review_img = None
            items = {'user_name': user_name, 'user_img': user_img, "user_href": user_href,
                     'review_star_rating': review_star_rating,
                     'review_title': review_title, "review_date": review_date, "review_href": review_href,
                     "var_data": var_data,
                     'var_asin': var_asin, "is_vp": verified_purchase, "review_data": review_data,
                     "review_data_img": review_img}
            print(items)

    def run(self):
        self.redis_db()
        self.get_asin_reviewer()


if __name__ == '__main__':
    Amazon_reviewer().run()
