

# -*- coding: utf-8 -*-
import os
os.environ['NO_PROXY'] = 'stackoverflow.com'
import logging
logging.captureWarnings(True)
from DrissionPage import ChromiumPage, ChromiumOptions
import time
from datetime import datetime, timedelta
from time import sleep
from random import randint
import requests
import math
import pandas as pd
import redis
import json
from pathlib import Path
import re
from sqlalchemy import create_engine
import random

class TkVideo():
    def __init__(self):
        # 修改请求头
        self.headers = {
            'accept': '*/*',
            'accept-language': 'en-US,en;q=0.9', # 'en-US,en;q=0.9'
            'cache-control': 'no-cache',
            'content-type': 'application/json; charset=UTF-8',
            'origin': 'https://www.tiktok.com',
            'pragma': 'no-cache',
            'priority': 'u=1, i',
            'referer': 'https://www.tiktok.com/',
            'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'cross-site',
            'sec-fetch-storage-access': 'active',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
        }
        self.key = "positiveliliya"
        # 下载文件路径
        self.download_folder = r"D:\Downloads"
        self.receiver_name = 'pengyanbing'
        # Redis 配置信息
        self.REDIS_CONFIG =  {
            'host': '120.79.147.190',
            'port': 6379,
            'password': 'fG7#vT6kQ1pX',
            'db': 13,
            'decode_responses': True
        }
        # self.page_edge = ChromiumPage()

        # 配置 Edge 浏览器 - 端口 9221
        edge_options = ChromiumOptions()
        # edge_options.set_browser_path(r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe')
        edge_options.set_browser_path(r'C:\Program Files\Edge\App\msedge.exe')
        edge_options.set_local_port(9220)  # 设置 Edge 的调试端口

        # 创建浏览器实例
        self.page_edge = ChromiumPage(addr_or_opts=edge_options)

        print(f"Edge 浏览器运行在端口: {9220}")

    def get_datetime(self):
        """获取当前日期，并计算前2天的完整日期（年-月-日），并按照指定格式输出"""
        # 获取今天的日期
        today = datetime.today().date()
        # 开始日期：去年的今天
        self.start_date = today.replace(year=today.year - 1)
        self.start_year = self.start_date.year
        self.start_month = self.start_date.month
        self.start_day = self.start_date.day
        # 结束日期：今天的前一天
        self.end_date = today - timedelta(days=3)
        self.end_year = self.end_date.year
        self.end_month = self.end_date.month
        self.end_day = self.end_date.day


    def get_day(self):
        try:
            self.page_edge.get("https://www.tiktok.com/business-suite/insight/video")
            self.page_edge.set.window.max()
            # 等待页面初始加载
            time.sleep(random.randint(6, 10))

            export_orders = self.page_edge.ele('xpath://span[text()="自定义"]', timeout=13)
            export_orders.click()
            print('点击自定义')
            time.sleep(random.randint(5, 10))

            # 先点击开始时间：2024年7月1日
            self.page_edge.ele(
                f"xpath=//div[@class='tiktok-datepicker-month-title' and contains(text(), '{self.start_year} {self.start_month} 月')]"
                f"/following-sibling::div[@class='tiktok-datepicker-day-wrapper']"
                f"//div[@class='tiktok-datepicker-day valid in-this-month']"
                f"//span[text()='{self.start_day}']/parent::div"
            ).click()
            print(f'已输入开始时间{self.start_year} {self.start_month} 月 {self.start_day} 日')
            time.sleep(random.randint(3, 5))

            for i in range(10):
                try:
                    xpath = (
                        f"//div[@class='tiktok-datepicker-month-title' and contains(text(), '{self.end_year} {self.end_month} 月')]"
                        f"/following-sibling::div[@class='tiktok-datepicker-day-wrapper']"
                        f"//span[text()='{self.end_day}']/parent::div"
                    )
                    print('结束日期 xpath::', xpath)
                    ele = self.page_edge.ele(f"xpath={xpath}", timeout=8)
                    ele.click()
                    print(f'✅ 成功点击日期：{self.end_year}-{self.end_month}-{self.end_day}')
                    self.get_data()
                    time.sleep(random.randint(3, 5))
                    return True  # 成功返回

                except Exception as e:
                    print(f'❌ 无法点击 {self.end_year}-{self.end_month}-{self.end_day}，错误：{e}')
                    # 往前推一天
                    time.sleep(random.randint(5, 15))

            print('⛔ 连续尝试失败，未找到可点击的日期，请检查页面状态或网络连接。')
            return False

        except Exception as e:
            print(f"get_day出现错误: {e}")
            self.send_error_notification_via_wechat(e)  # 如果有这个方法可以取消注释

    def get_data(self):
        try:
            self.page_edge.ele('xpath://div[text()="更新"]', timeout=13).click()
            print('已点击更新')
            sleep(randint(5, 10))

            self.page_edge.ele('xpath://span[text()="下载数据"]', timeout=13).click()
            print('已点击下载数据')
            sleep(randint(5, 10))

            self.page_edge.ele('xpath://span[text()="Xlsx"]', timeout=13).click()
            print('已点击Xlsx')
            sleep(randint(5, 10))

            self.page_edge.ele('xpath://button[text()="下载数据"]', timeout=13).click()
            print('已点击下载数据')
            sleep(randint(5, 10))

            # 点击首页 获取店铺名称
            self.page_edge.ele('xpath://span[text()="首页"]', timeout=13).click()
            print('已点击首页')
            sleep(randint(5, 10))

            self.shop_name = self.page_edge.ele('xpath://div[@class="text-H6-Bold"]').text
            print(f'已获取店铺名: {self.shop_name}')
            sleep(randint(5, 10))

            self.save_to_redis()
            time.sleep(5)
            self.page_edge.quit()

        except Exception as e:
            print(f"get_data出现错误: {e}")
            self.send_error_notification_via_wechat(e)


    def connect_redis(self):
        """建立 Redis 连接"""
        self.r = redis.StrictRedis(**self.REDIS_CONFIG)
        try:
            self.r.ping()  # 测试连接
            print("✅ 成功连接到 Redis")
        except redis.exceptions.ConnectionError as e:
            print(f"❌ 无法连接到 Redis: {e}")
            raise

    def read_excel(self, file_path):
        """读取 Excel 文件内容，并防止数值被转为科学计数法"""
        print(f"📄 正在读取文件：{file_path}")

        # 定义列名映射（中文 → 英文）
        column_mapping = {
            '视频标题': 'video_title',
            '视频链接': 'video_url',
            '发布时间': 'publish_date',
            '视频观看次数': 'views',
            '点赞数': 'likes',
            '评论数': 'comments',
            '分享次数': 'shares',
            '添加到收藏': 'favorites'
        }

        # 强制所有列为字符串类型，防止科学计数法
        df = pd.read_excel(file_path, dtype=str)

        # 替换列名为英文
        df.rename(columns=column_mapping, inplace=True)

        data = df.to_dict(orient='records')  # 转换为字典列表
        print(f"📊 已读取 {len(data)} 条记录")
        return data

    def process_data(self, data, account):
        processed_data = []
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        for record in data:
            # 提取 content_id
            video_link = record.get('video_url', '')
            if video_link:
                content_id = video_link.split('/')[-1]
            else:
                content_id = ''

            # 添加 account 和 update_time 字段
            processed_record = {
                'account': account,
                'content_id': content_id,
                'update_time': current_time,
                **record  # 合并原始记录
            }

            # 替换 NaN 和 None 为 空字符串
            cleaned_record = {
                key: ("" if pd.isna(value) or value is None or str(value).strip().lower() == "nan" else value)
                for key, value in processed_record.items()
            }

            processed_data.append(cleaned_record)

        return processed_data

    def store_data_in_redis(self, r, data):
        """将数据存储到 Redis 中，对相同的 shop_code 清除旧数据后写入新数据"""
        key = f"tk_video_data:{self.key}:order:list"

        # ⚠️ 先删除旧数据（实现“覆盖”）
        if r.exists(key):
            r.delete(key)
            print(f"🗑️ 已清除旧数据: {key}")

        # 写入新数据
        pipe = r.pipeline()
        for record in data:
            value = json.dumps(record, ensure_ascii=False)
            pipe.rpush(key, value)

        pipe.execute()  # 批量执行，提高效率
        print(f"💾 已写入新数据到键: {key}，共 {len(data)} 条记录")

    def find_specific_file(self):
        download_path = Path(self.download_folder)

        if self.start_month < 10:
            start_month = f'0{self.start_month}'
        else:
            start_month = self.start_month
        if self.start_day < 10:
            start_day = f'0{self.start_day}'
        else:
            start_day = self.start_day

        if self.end_month < 10:
            end_month = f'0{self.end_month}'
        else:
            end_month = self.end_month
        if self.end_day < 10:
            end_day = f'0{self.end_day}'
        else:
            end_day = self.end_day

        # 构建基础前缀（使用真正的括号）
        base_prefix = f"视频({self.start_year}_{start_month}_{start_day}-{self.end_year}_{end_month}_{end_day})"

        # 构建正则表达式：以 base_prefix 开头，后面可以跟任意内容
        pattern = re.escape(base_prefix) + r'.*$'
        print("匹配模式:", pattern)

        for file in download_path.iterdir():
            if file.is_file() and re.fullmatch(pattern, file.name):
                return str(file)

        raise FileNotFoundError(f"未找到匹配 {base_prefix} 的文件")

    def save_to_redis(self):
        EXCEL_FILE = self.find_specific_file()
        print(f'保存文件：{EXCEL_FILE}')

        # 读取 Excel 数据
        data = self.read_excel(EXCEL_FILE)
        processed_data = self.process_data(data, self.shop_name)
        self.store_data_in_redis(self.r, processed_data)
        try:
            print('删除下载文件',EXCEL_FILE)
            os.remove(EXCEL_FILE)
        except:
            print('删除数据失败')

    def send_error_notification_via_wechat(self,error_message):
        webhook_url = 'http://47.112.96.71:8082/selection/sendMessage'  # 替换为你的企业微信机器人的Webhook URL
        data = {
            "account": self.receiver_name,
            'title':'【TK视频数据下载异常提醒】',
            'content':f'账号：{self.key}，错误信息：{error_message}, 时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
        }

        response = requests.post(url=webhook_url, data=data,timeout=15)
        if response.status_code == 200:
            print("已成功发送错误通知到企业微信")
        else:
            print(f"发送错误通知失败: {response.text}")
    def run(self):
        self.connect_redis()
        self.get_datetime()
        self.get_day()
        print('完成关闭浏览器')
        time.sleep(5)
        # 如果 ChromiumPage 底层保存了 browser 对象
        # 或者如果它是基于 Selenium WebDriver
        self.page_edge.quit()

if __name__ == '__main__':
    TkVideo().run()











