import os

os.environ['NO_PROXY'] = 'stackoverflow.com'
import logging

logging.captureWarnings(True)
from DrissionPage import ChromiumPage, ChromiumOptions
import time
from datetime import datetime, timedelta
from time import sleep
from random import randint
import requests
import math
import pandas as pd
import redis
import json
from pathlib import Path
import re
from bs4 import BeautifulSoup
import difflib
import os


class Amazon_dif():
    def __init__(self):
        self.page = ChromiumPage()
        # 修改请求头
        self.headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'priority': 'u=0, i',
            'sec-ch-ua': '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'none',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
        }
        self.url = "https://sellercentral.amazon.com/help/hub/reference/external/GVACXTSVATE36M4M?locale=zh-CN&ref=as_cn_ags_policy_na_product&initialSessionID=000-9225692-1269734&ld=NSBing&pageName=CN%3AAS%3AGS-policy"

    def get_html(self):
        try:
            self.page.get(self.url)
            self.page.maximize_window()
            sleep(randint(5, 10))
            current_html = self.page.page_source

            return current_html

        except Exception as e:
            print(e)
            return None

    def read_file(self, file_path):
        if not os.path.exists(file_path):
            print(f"文件 {file_path} 不存在")
            return ""
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()

    def extract_text(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        return soup.get_text().strip()

    def compare_texts(self, text1, text2):
        matcher = difflib.SequenceMatcher(None, text1, text2)
        differences = []

        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
            if tag == 'replace':
                differences.append(f"[修改] 原文: '{text1[i1:i2]}' → 新文: '{text2[j1:j2]}'")
            elif tag == 'delete':
                differences.append(f"[删除] 内容: '{text1[i1:i2]}'")
            elif tag == 'insert':
                differences.append(f"[新增] 内容: '{text2[j1:j2]}'")

        return differences



    def run(self):
        # current_html = self.get_html()
        with open('amazon_current.html', 'r', encoding='utf-8') as f:
            current_html = f.read()
        if not current_html:
            return

        old_html = self.read_file('amazon.html')
        current_text = self.extract_text(current_html)
        old_text = self.extract_text(old_html)

        print("开始对比文本内容...")
        differences = self.compare_texts(old_text, current_text)

        if differences:
            print("发现以下差异：")
            for line in differences:
                print(line)
        else:
            print("文本内容一致，没有变化。")

        self.page.quit()

if __name__ == '__main__':
    Amazon_dif().run()




















