no message

badb8f44 · Peng · a8be8ef1 · badb8f44 · badb8f44
Commit badb8f44 authored Nov 20, 2025 by Peng
Show whitespace changes
Inline Side-by-side

Showing with 956 additions and 0 deletions

all_connect.py ...s/picture_material/stock_summery/new_stock/all_connect.py +242 -0

xnj_url.py ...jects/picture_material/stock_summery/new_stock/xnj_url.py +714 -0

No files found.
--- a/wangjing_projects/projects/picture_material/stock_summery/new_stock/all_connect.py
+++ b/wangjing_projects/projects/picture_material/stock_summery/new_stock/all_connect.py
+import json
+import pandas as pd
+from secure_db_client import get_remote_engine
+import time
+from sqlalchemy import create_engine
+class ConnectSpider:
+    def __init__(self):
+        self.db_engine = get_remote_engine(
+            site_name='us',  # -> database "selection"
+            db_type="postgresql_14_outer",  # -> 服务端 alias "mysql"
+        )
+        self.db_engine192 = get_remote_engine(
+            site_name='us',  # -> database "selection"
+            db_type="postgresql_14_outer",  # -> 服务端 alias "mysql"
+        )
+        # self.pg_port = 54328
+        # self.pg_db = "selection"
+        # self.pg_user = "postgres"
+        # self.pg_pwd = "F9kL2sXe81rZq"
+        # self.pg_host = "61.145.136.61"
+        # pg_host = "192.168.10.223"
+        # self.db_engine192 = create_engine(
+        #     f"postgresql://{self.pg_user}:{self.pg_pwd}@{self.pg_host}:{self.pg_port}/{self.pg_db}")
+    def mysql(self):
+        mysql_engine = get_remote_engine(
+            site_name='us',  # -> database "selection"
+            db_type="mysql",  # -> 服务端 alias "mysql"
+        )
+        return mysql_engine
+    def save_stock_img_id(self,items):
+        # sql = """
+        # INSERT INTO stock_image_id_wj
+        #   (account_id, image_id, state, created_at, image_title, image_size_info)
+        # VALUES (%s, %s, %s, %s, %s, %s)
+        # ON DUPLICATE KEY UPDATE
+        #   state = VALUES(state),
+        #   created_at = VALUES(created_at),
+        #   image_title = VALUES(image_title),
+        #   image_size_info = VALUES(image_size_info)
+        # """
+        sql = """
+        INSERT INTO stock_image_id_wj
+          (account_id, image_id, state, created_at, image_title, image_size_info)
+        VALUES (%s, %s, %s, %s, %s, %s::jsonb)
+        ON CONFLICT (account_id, image_id) DO UPDATE SET
+          state = EXCLUDED.state,
+          created_at = EXCLUDED.created_at,
+          image_title = EXCLUDED.image_title,
+          image_size_info = EXCLUDED.image_size_info;
+        """
+        params = [
+            (
+                item['account_id'],
+                item['image_id'],
+                item['state'],
+                item['created_at'],
+                item.get('title', ''),
+                json.dumps(item.get('sizes', {}))
+        )
+            for item in items
+        ]
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as conn:
+                    conn.execute(sql, params)
+                print('存储更新成功')
+                break
+            except Exception as e:
+                time.sleep(30)
+                print('save_stock_img_id 报错。', e)
+    def update_id_to_3(self, account_id):
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as connection:
+                    table_name = "stock_image_summary_wj"
+                    print(account_id)
+                    sql_update = f"UPDATE {table_name} SET state = 3 WHERE account_id='{account_id}'"
+                    print(sql_update,'成功更新为3')
+                    connection.execute(sql_update)
+                break
+            except Exception as e:
+                time.sleep(30)
+                print('update_id_to_3 报错。', e)
+    def update_all_states_to_1(self, state=1, item_id=None):
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as connection:  # 使用 begin() 自动管理事务
+                    table_name = "stock_image_summary_wj"
+                    if state == 3:
+                        sql_update = f"UPDATE {table_name} SET state = {state} where id={item_id}"
+                    else:
+                        sql_update = f"UPDATE {table_name} SET state = {state}"
+                    print(sql_update)
+                    connection.execute(sql_update)
+                break
+            except Exception as e:
+                time.sleep(30)
+                print(f'更新状态失败：{e}')
+                # 回滚事务
+    def save_stock_detail(self, item):
+        """批量保存数据到数据库。"""
+        table_name = "stock_image_detail_wj"
+        # 将item包装成列表
+        items_list = [item]
+        # 定义DataFrame的列
+        columns = ['account_id', 'image_id', 'image_size_info', 'image_title', 'image_type', 'image_url', 'state',
+                   'created_time']
+        df = pd.DataFrame(items_list, columns=columns)
+        for i in range(5):
+            try:
+                self.db_engine192.to_sql(df, table_name, if_exists='append')
+                print("保存成功！")
+                break
+            except Exception as e:
+                time.sleep(30)
+                print(f'save_stock_detail 报错：{e}')
+                # 回滚事务
+    def get_stock_images_id(self, account_id):
+        for i in range(5):
+            try:
+                table_name = "stock_image_id_wj"
+                # 修改查询语句以匹配你的数据表名称和列名称
+                query = f""" SELECT image_id,id,image_title,image_size_info FROM {table_name} where account_id ='{account_id}' and state = 1"""
+                print(query)
+                df_status = self.db_engine192.read_sql(query)
+                try:
+                    df_status['id'] = df_status['id'].astype(str)
+                    image_id_id_pairs = list(
+                        df_status['image_id'] + '||-||' + df_status['id'] + '||-||' + df_status['image_title'] + '||-||' +
+                        df_status['image_size_info'])
+                    print(f'账号：{account_id}需爬取{len(image_id_id_pairs)}张')
+                    return image_id_id_pairs
+                except Exception as e:
+                    print(e)
+                    return False
+            except Exception as e:
+                time.sleep(30)
+                print(f'get_stock_images_id 报错：{e}')
+                # 回滚事务
+    def update_image_id_to_3(self, item_id):
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as connection:
+                    table_name = "stock_image_id_wj"
+                    sql_update = f"UPDATE {table_name} SET state = 3 WHERE id = {item_id}"
+                    connection.execute(sql_update)
+                break
+            except Exception as e:
+                time.sleep(30)
+                print(f'update_image_id_to_3 报错：{e}')
+                # 回滚事务
+    def update_image_id_to_4(self, item_id):
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as connection:
+                    table_name = "stock_image_id_wj"
+                    sql_update = f"UPDATE {table_name} SET state = 4 WHERE id = {item_id}"
+                    connection.execute(sql_update)
+                break
+            except Exception as e:
+                time.sleep(30)
+                print(f'update_image_id_to_4 报错：{e}')
+    def get_cookie_account(self, item_id):
+        for i in range(5):
+            try:
+                table_name = "stock_image_summary_wj"
+                # 修改查询语句以匹配你的数据表名称和列名称
+                query = f"""SELECT account_id,account_secret FROM {table_name} where id = {item_id} and state= 1;"""
+                print(query)
+                df_status = self.db_engine192.read_sql(query)
+                if len(df_status) > 0:
+                    account_id = df_status.account_id.iloc[0]
+                    account_secret = df_status.account_secret.iloc[0]
+                    account_list = [account_id, account_secret]
+                    print(account_list, '232323====32')
+                    return account_list
+                else:
+                    return None
+            except Exception as e:
+                time.sleep(30)
+                print(f'get_cookie_account 报错：{e}')
+    def get_all_image_id(self):
+        for i in range(5):
+            try:
+                table_name = "stock_image_detail_wj"
+                sql_query = f"SELECT image_id FROM {table_name} "
+                df_status = self.db_engine192.read_sql(sql_query)
+                image_id = list(df_status['image_id'].astype(str))
+                return image_id
+            except Exception as e:
+                time.sleep(30)
+                print(f'get_all_image_id 报错：{e}')
+    def update_url_state_to_3(self, image_id):
+        for i in range(5):
+            try:
+                with self.db_engine192.begin() as connection:
+                    table_name = "stock_image_detail_wj"
+                    sql_update = f"UPDATE {table_name} SET state = 3 WHERE image_id ='{image_id}' and state = 1"
+                    connection.execute(sql_update)
+                break
+            except Exception as e:
+                time.sleep(30)
+                print(f'update_url_state_to_3 报错：{e}')
+    def get_pic_urls(self, account_id):
+        pic_data_list = []  # 创建一个空列表来存储结果
+        table_name = "stock_image_detail_wj"
+        query =f"""select image_url, image_id, image_title from {table_name} where account_id = '{account_id}' and state = 1"""
+        try:
+            result_df = self.db_engine192.read_sql(query)
+            # print(result_df)
+            result_list = result_df.values.tolist()
+            for row in result_list:  # 遍历所有的结果行
+                if row is not None:
+                    # 直接构建所需格式的字符串
+                    pic_datas = f"{row[0]}||{row[1]}||{row[2]}"
+                    pic_data_list.append(pic_datas)  # 添加到列表中
+                    """
+                    ['https://download.shutterstock.com/gatekeeper/W3siZCI6ICJzaHV0dGVyc3RvY2stbWVkaWEiLCAiayI6ICJwaG90by8yNDY2MDI5NDI1L2h1Z2UuanBnIiwgImRjIjogImlkbF8xMjMiLCAiZSI6IDE3NDYwMTIzNDQsICJtIjogMX0sICJBb0dOUzZDMXNiVU1XczgxMFN5YVBsUEJrakEiXQ==/shutterstock_2466029425.jpg||2466029425||Beautiful smiling model in sunglasses. Female dressed in summer hipster white T-shirt and jeans. Posing near white wall in the street. Funny and positive woman having fun outdoors, in sunglasses']
+                    """
+            if not pic_data_list:
+                # print("No data found for the given account_id")
+                return False
+            else:
+                return pic_data_list  # 返回列表
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return False
+if __name__ == '__main__':
+    ConnectSpider().get_cookie_account(10)
--- a/wangjing_projects/projects/picture_material/stock_summery/new_stock/xnj_url.py
+++ b/wangjing_projects/projects/picture_material/stock_summery/new_stock/xnj_url.py
+# -*- coding: utf-8 -*-
+import os
+import sys
+sys.path.append(os.path.dirname(sys.path[0]))  # 上级目录
+from time import sleep
+from random import randint
+from all_connect import ConnectSpider
+import traceback
+Con = ConnectSpider()
+import imaplib
+import email
+import os
+import time
+import requests
+import hashlib
+os.environ['NO_PROXY'] = 'stackoverflow.com'
+import logging
+logging.captureWarnings(True)
+from DrissionPage import ChromiumPage,ChromiumOptions
+import json
+from curl_cffi import requests
+import re
+import random
+import time
+from datetime import datetime, timedelta
+import calendar
+import sys
+class GetStockImgId(object):
+    def __init__(self):
+        self.headers = {
+    'accept': 'application/json',
+    'accept-language': 'zh-CN,zh;q=0.9',
+    'content-type': 'application/json',
+    'newrelic': 'eyJ2IjpbMCwxXSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6Ijk2NzIzMiIsImFwIjoiMTU4ODYzMjc5MiIsImlkIjoiMjgzNzAxYzA5ODljNWI4YiIsInRyIjoiMDYwYTQwMzI4MjhiMGNlM2ZkZmJlYzAxNDU5NTVhZDUiLCJ0aSI6MTczNTg4NTk5ODcxOX19',
+    'origin': 'https://www.shutterstock.com',
+    'priority': 'u=1, i',
+    'referer': 'https://www.shutterstock.com/zh/catalog/licenses?startDate=2024-12-01&endDate=2024-12-31',
+    'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': '"Windows"',
+    'sec-fetch-dest': 'empty',
+    'sec-fetch-mode': 'cors',
+    'sec-fetch-site': 'same-origin',
+    'traceparent': '00-060a4032828b0ce3fdfbec0145955ad5-283701c0989c5b8b-01',
+    'tracestate': '967232@nr=0-1-967232-1588632792-283701c0989c5b8b----1735885998719',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
+    'x-end-app-name': 'next-web',
+    'x-end-app-version': '837034fdc61',
+    'x-newrelic-id': 'XQAAU1VRGwIEVVhaBgYGUlI=',
+    'x-request-id': 'c3a36b63-ff03-4c2f-9a94-5381cd4485a7',
+}
+    def random_ua(self):
+        first_num = random.randint(55, 62)
+        third_num = random.randint(0, 3200)
+        fourth_num = random.randint(0, 140)
+        os_type = [
+            '(Windows NT 6.1; WOW64)',
+            '(Windows NT 10.0; WOW64)',
+            '(X11; Linux x86_64)',
+            '(Macintosh; Intel Mac OS X 10_12_6)'
+        ]
+        chrome_version = 'Chrome/{}.0.{}.{}'.format(first_num, third_num, fourth_num)
+        ua = ' '.join(['Mozilla/5.0', random.choice(os_type), 'AppleWebKit/537.36',
+                       '(KHTML, like Gecko)', chrome_version, 'Safari/537.36']
+                      )
+        self.headers['user-agent'] = ua
+    def get_url_month(self, page, cookie, start_date, last_date):
+        # self.random_ua()
+        "https://www.shutterstock.com/napi/s/dam/holdings/search?include=media-item%2Cmedia-item.track-assets%2Cmedia-item.cms-entry&sort=-licensedAt&useMms=true&channel=shutterstock&page[size]=50&filter[licensedSince]={start_date}T00%3A00%3A00Z&filter[licensedUntil]={last_date}T23%3A59%3A59Z&filter[assetStatus]=comped%2Clicensed&language=zh"
+        url = f"https://www.shutterstock.com/napi/s/dam/holdings/search?include=media-item%2Cmedia-item.track-assets%2Cmedia-item.cms-entry&sort=-licensedAt&useMms=true&channel=shutterstock&page[size]=50&filter[licensedSince]={start_date}T00%3A00%3A00Z&filter[licensedUntil]={last_date}T23%3A59%3A59Z&page[number]={page}&filter[assetStatus]=comped%2Clicensed&language=zh"
+        print('url:',url)
+        # url = "https://www.shutterstock.com/napi/s/dam/holdings/search"
+        # params = {
+        #     "include": "media-item,media-item.track-assets,media-item.cms-entry",
+        #     "sort": "-licensedAt",
+        #     "useMms": "true",
+        #     "channel": "shutterstock",
+        #     "page\\[size\\]": "50",
+        #     "filter\\[licensedSince\\]": "2025-09-01T00:00:00Z",
+        #     "filter\\[licensedUntil\\]": "2025-09-30T23:59:59Z",
+        #     "filter\\[assetStatus\\]": "comped,licensed",
+        #     "language": "zh"
+        # }
+        response = requests.get(url, headers=self.headers, cookies=cookie)
+        print(response)
+        print(response.url)
+        return response
+    def get_img_id(self, response, account_id, page):
+        try:
+            # print(response.json())
+            data = response.json()['included']
+            if data:
+                data_list = []
+                for item in data:
+                    datas = {}
+                    item_id = item['id']
+                    title = item['attributes']['title']
+                    sizes = item['attributes']['sizes']
+                    datas['account_id'] = account_id
+                    datas['image_id'] = int(item_id)
+                    datas['title'] = title
+                    datas['sizes'] = sizes
+                    datas['state'] = 1
+                    datas['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                    data_list.append(datas)
+                # 保存
+                print('准备保存：')
+                Con.save_stock_img_id(data_list)
+                print(f"{account_id}第{page}页保存id成功,")
+                return True
+            else:
+                print('最后一页，全部保存成功')
+                return False
+        except Exception as e:
+            print(e)
+            if 'included' in str(e):
+                print('最后一页，全部保存成功')
+                return False
+    def get_last_month_start_end(self):
+        # 获取今天的日期
+        today = datetime.today()
+        # 计算上个月的第一天
+        first_day_of_this_month = today.replace(day=1)
+        first_day_of_last_month = (first_day_of_this_month - timedelta(days=1)).replace(day=1)
+        # 计算上个月的最后一天
+        _, last_day_of_last_month = calendar.monthrange(first_day_of_last_month.year, first_day_of_last_month.month)
+        last_day_of_last_month_date = first_day_of_last_month.replace(day=last_day_of_last_month)
+        # 格式化输出
+        start_date = first_day_of_last_month.strftime('%Y-%m-%d')
+        last_date = last_day_of_last_month_date.strftime('%Y-%m-%d')
+        return start_date, last_date
+    def run(self, account_id, cookie):
+        is_continue = True
+        page = 1
+        start_date, last_date = self.get_last_month_start_end()
+        # start_date = '2023-12-01'
+        # last_date = '2023-12-31'
+        print(f"Start Date: {start_date}")
+        print(f"Last Date: {last_date}")
+        while is_continue:
+            try:
+                response = self.get_url_month(page, cookie, str(start_date), str(last_date))
+                if response.status_code == 200:
+                    # 更新是否继续标志位
+                    is_continue = self.get_img_id(response, account_id, page)
+                    print('is_continue:',is_continue)
+                    # 如果不再继续，则更新数据库并将当前账户标记为已完成
+                    if not is_continue:
+                        Con.update_id_to_3(account_id)
+                        break
+                    # 等待一段时间再进行下一次请求
+                    time.sleep(random.randint(3, 6))
+                    page += 1
+                else:
+                    print(f'状态码为{response.status_code}, 请求失败')
+                    raise
+            except Exception as e:
+                print(e)
+                # 抛出异常以停止外层循环
+                raise
+class GetSS_details():
+    def __init__(self):
+        self.account = ''
+        self.pwd = ''
+        # self.page = ChromiumPage()
+        # 配置 Chrome 浏览器 - 端口 9222
+        chrome_options = ChromiumOptions()
+        chrome_options.set_browser_path(r'C:\Program Files\Google\Chrome\Application\chrome.exe')
+        chrome_options.set_local_port(9333)  # 设置 Chrome 的调试端口
+        self.page = ChromiumPage(addr_or_opts=chrome_options)
+        print(f"Chrome 浏览器运行在端口: {9333}")
+        self.headers = {
+    'accept': 'application/json',
+    'accept-language': 'zh-CN,zh;q=0.9',
+    'content-type': 'application/json',
+    'newrelic': 'eyJ2IjpbMCwxXSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6Ijk2NzIzMiIsImFwIjoiMTU4ODYzMjc5MiIsImlkIjoiMDdjNDZhYTI3ZTBlMTAyZiIsInRyIjoiOGI4ODQ3MzNiNjFjNDNlY2YxMGEzOTQ2MzQ4MDE2NzQiLCJ0aSI6MTczNTk5NzEzNjEyOH19',
+    'origin': 'https://www.shutterstock.com',
+    'priority': 'u=1, i',
+    'referer': 'https://www.shutterstock.com/zh/catalog/',
+    'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': '"Windows"',
+    'sec-fetch-dest': 'empty',
+    'sec-fetch-mode': 'cors',
+    'sec-fetch-site': 'same-origin',
+    'traceparent': '00-8b884733b61c43ecf10a394634801674-07c46aa27e0e102f-01',
+    'tracestate': '967232@nr=0-1-967232-1588632792-07c46aa27e0e102f----1735997136128',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
+    'x-end-app-name': 'next-web',
+    'x-end-app-version': '5ca4a4c05d8',
+    'x-newrelic-id': 'XQAAU1VRGwIEVVhaBgYGUlI=',
+    'x-request-id': '15754a73-f152-4983-99b4-6af058379880',
+}
+        self.email_value_config = {
+            'imap_server': 'imap.exmail.qq.com',
+            'username': 'pengyanbing@yswg.com.cn',
+            'password': 'Python3.8',
+        }
+        self.get_microservice_token()
+    def get_ck(self):
+        print('获取登录后的cookie')
+        try:
+            self.page.get('https://www.shutterstock.com/zh/catalog/licenses')
+            sleep(randint(2, 4))
+            # 获取 cookies 列表
+            original_cookies_list = self.page.cookies()
+            # 将 cookies 列表转换为字典
+            original_cookie_dict = {cookie['name']: cookie['value'] for cookie in original_cookies_list}
+            print('original_cookie_dict::',original_cookie_dict)
+            # # 检查 accts_customer_sso1 是否等于 '-undefined'
+            # if 'accts_customer_sso1' in original_cookie_dict and original_cookie_dict.get(
+            #         'accts_customer_sso1') == '-undefined':
+            #     # 组合成新的值并更新 accts_customer_sso1
+            #     new_value = f"{original_cookie_dict.get('htjs_user_id', '')}-undefined"
+            #     original_cookie_dict['accts_customer_sso1'] = new_value
+            #
+            # keys_of_interest = ['datadome', 'accts_customer_sso1', 'next.sid']
+            # cookies = {key: original_cookie_dict[key] for key in keys_of_interest if key in original_cookie_dict}
+            #
+            # # print('filtered_cookies:', cookies)
+            return original_cookie_dict
+        except Exception as e:
+            print('获取cookie出错：', e)
+    def login_out(self):
+        login_out = self.page.ele('.MuiAvatar-root MuiAvatar-circular MuiAvatar-colorDefault mui-9jj0tt-avatarSize')
+        if login_out:
+            login_out.click()
+            sleep(randint(2, 4))
+            self.page.ele('@text()=登出').click()
+        else:
+            login_out = self.page.ele('.MuiAvatar-root MuiAvatar-circular MuiAvatar-colorDefault mui-1jeofke')
+            if login_out:
+                login_out.click()
+                sleep(randint(2, 4))
+                self.page.ele('@text()=登出').click()
+            else:
+                login_out = self.page.ele('.MuiAvatar-root MuiAvatar-circular MuiAvatar-colorDefault mui-1ki7tcg')
+                if login_out:
+                    login_out.click()
+                    sleep(randint(2, 4))
+                    self.page.ele('@text()=登出').click()
+    def decode_body(self, body):
+        """尝试多种编码方式解码邮件内容"""
+        encodings = ['utf-8', 'gb18030', 'iso-8859-1', 'latin1']
+        for encoding in encodings:
+            try:
+                decoded_body = body.decode(encoding)
+                return decoded_body
+            except UnicodeDecodeError:
+                continue
+        return body.decode('latin1', errors='replace')  # 最后尝试 latin1 编码，替换无法解码的字符
+    def extract_verification_code(self, text_body):
+        """提取验证码"""
+        patterns = [
+            r'以验证您的身份：(\d{6})',
+            r'一次性密码：(\d{6})',
+            r'验证码：(\d{6})',
+            r'(\d{6})\s*此密码仅可使用一次'
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, text_body)
+            if match:
+                return match.group(1)
+        return None
+    def fetch_verification_code(self, email_value_config):
+        try:
+            mail = imaplib.IMAP4_SSL(email_value_config['imap_server'])
+            mail.login(email_value_config["username"], email_value_config["password"])
+            mail.select('inbox')
+            search_query = '(FROM "noreply@shutterstock.com")'
+            result, data = mail.search(None, search_query)
+            if result != 'OK':
+                print("没有找到邮件")
+                return None
+            email_ids = data[0].split()
+            for email_id in reversed(email_ids):  # 从最新的邮件开始
+                result, data = mail.fetch(email_id, "(RFC822)")
+                raw_email = data[0][1]
+                email_message = email.message_from_bytes(raw_email)
+                if email_message.is_multipart():
+                    for part in email_message.walk():
+                        content_type = part.get_content_type()
+                        if content_type == 'text/plain':
+                            body = part.get_payload(decode=True)
+                            decoded_body = self.decode_body(body)
+                            if 'shutterstock' in decoded_body:
+                                text_body = decoded_body
+                                verification_code = self.extract_verification_code(text_body)
+                                if verification_code:
+                                    print("验证码是：", verification_code)
+                                    mail.close()
+                                    mail.logout()
+                                    return verification_code
+                else:
+                    body = email_message.get_payload(decode=True)
+                    decoded_body = self.decode_body(body)
+                    if 'shutterstock' in decoded_body:
+                        text_body = decoded_body
+                        verification_code = self.extract_verification_code(text_body)
+                        if verification_code:
+                            print("验证码是：", verification_code)
+                            mail.close()
+                            mail.logout()
+                            return verification_code
+            print("没有找到符合条件的邮件")
+            mail.close()
+            mail.logout()
+            return None
+        except imaplib.IMAP4.error as e:
+            print(f"IMAP4 error: {e}")
+        except Exception as e:
+            print(f"An unexpected error occurred: {e}")
+        finally:
+            try:
+                mail.close()
+                mail.logout()
+            except:
+                pass
+    def yxyzm(self):
+        print('需要输入邮箱验证码 等待2分钟')
+        sleep(randint(62, 140))
+        iframe = self.page.get_frame('#login-iframe')
+        sleep(randint(2, 4))
+        yzm = self.fetch_verification_code(self.email_value_config)
+        try:
+            print(('验证码输入'))
+            yzm_input = iframe.ele('@text()=输入代码')
+            sleep(randint(2, 4))
+            yzm_input.input(yzm)
+        except:
+            yzm_input = iframe.ele(
+                '.MuiInputBase-input MuiInput-input MuiInputBase-inputSizeSmall css-186x7cf')
+            sleep(randint(2, 4))
+            yzm_input.input(yzm)
+        print('点击验证')
+        iframe.ele('@text()=验证').click()
+    def get_microservice_token(self):
+        for i in range(5):
+            try:
+                url = "http://wx.yswg.com.cn:8000/microservice-system/system/admin/getToken"
+                timestamp = str(int(time.time()))
+                secret = "dafa17fb-0e97-4246-a6b3-d574e44d212d"
+                md5_value = hashlib.md5((secret + timestamp).encode("utf-8")).hexdigest()
+                response = requests.post(url, json={
+                    "module": "spider",
+                    "weChatId": "pengyanbing",
+                    "secret": md5_value,
+                    "timestamp": timestamp
+                })
+                res = response.json()
+                print(res)
+                if (res['code'] == 200):
+                    userinfo = res['data']
+                    self.token = userinfo['token']
+                    expireTime = userinfo['expireTime']
+                    print(self.token, expireTime)
+                else:
+                    raise Exception(res['msg'])
+                break
+            except Exception as e:
+                print('get_microservice_token, 报错',e)
+                time.sleep(20)
+    def login(self):
+        try:
+            # 打开页面
+            self.page.get('https://www.shutterstock.com/zh/catalog/')
+            sleep(randint(12, 24))
+            try:
+                # print('No thanks')
+                print('click  No thanks')
+                login_button = self.page.ele('xpath://a[@id="continue"]', timeout=15)
+                login_button.click()
+            except:
+                print('No thanks 错误')
+            print('开始登录。', self.account, self.pwd)
+            # 判断是否在登录状态
+            # self.login_out()
+            # 查找并点击登录按钮
+            login_button = self.page.ele('xpath://a[@data-automation="loginButton"]', timeout=15)
+            login_button.click()
+            sleep(randint(12,24))
+            # 等待页面加载，切换到 iframe
+            iframe = self.page.get_frame('#login-iframe')
+            print('已切换到 login-iframe')
+            # 查找并输入邮箱
+            print("正在等待邮箱输入框...")
+            sleep(15)
+            # email_input = iframe.ele('.MuiInputBase-input MuiInput-input MuiInputBase-inputSizeSmall css-186x7cf')
+            email_input = iframe.ele('xpath://input[@name="username"]')
+            email_input.clear()  # 清除任何预填充的内容
+            email_input.input(self.account)  # 输入文本
+            print("已输入账号到邮箱输入框")
+            sleep(randint(2, 4))
+            # 查找并输入密码
+            print("正在等待密码输入框...")
+            email_input = iframe.ele(
+                '.MuiInputBase-input MuiInput-input MuiInputBase-inputSizeSmall MuiInputBase-inputAdornedEnd css-186x7cf')
+            email_input.clear()  # 清除任何预填充的内容
+            email_input.input(self.pwd)
+            print("已输入密码到密码输入框")
+            sleep(randint(3, 5))
+            # 查找并点击登录按钮
+            print('查找并点击登录按钮')
+            # submit_button = iframe.ele(
+            #     '.MuiButtonBase-root MuiButton-root MuiButton-contained MuiButton-containedPrimary MuiButton-sizeMedium MuiButton-containedSizeMedium MuiButton-disableElevation MuiButton-fullWidth css-1w8itp0')
+            try:
+                submit_button = iframe.ele('.LoginForm_bottomSpacingMd__e2Mnm')
+                submit_button.click()
+            except:
+                print('切换点击')
+                sleep(randint(3, 4))
+                iframe.ele('.MuiButtonBase-root MuiButton-root MuiButton-contained MuiButton-containedPrimary MuiButton-sizeMedium MuiButton-containedSizeMedium MuiButton-disableElevation MuiButton-fullWidth css-1is1osn').click()
+            print('已点击登录...')
+            sleep(randint(8, 15))
+        except Exception as e:
+            print(f"出现错误: {e}", f"\n{traceback.format_exc()}")
+            return False
+        try:
+            print(33333333333)
+            iframe = self.page.get_frame('#login-iframe')
+            sleep(randint(4, 8))
+            h3_element = iframe.ele(
+                '.FormHeader_root__fHtRy wrapper-component_center__zG6GW')
+            h3_ = iframe.ele('@text()=输入验证代码')# 要继续，请输入发送到您电子邮件中的代码
+            h3_1 = iframe.ele('xpath://h3[contains(text(),"输入验证代码")]')# 要继续，请输入发送到您电子邮件中的代码
+            P_text1 = iframe.ele('xpath://p[contains(text(),"未收到代码？单击")]', timeout=15)
+            P_text2 = iframe.ele('xpath://p[contains(text(),"电子邮件中")]', timeout=15)
+            if h3_element or h3_ or h3_1 or P_text1 or P_text2 or '输入验证代码' in iframe.html or '输入验证代码' in self.page.html:
+                self.yxyzm()
+            else:
+                print('不需要验证码')
+            sleep(10)
+            self.page.refresh()
+            sleep(randint(5, 10))
+            self.page.get('https://www.shutterstock.com/zh/catalog/licenses')
+            sleep(randint(4, 8))
+            ck = self.get_ck()
+            return ck
+        except Exception as e:
+            print(e)
+            print('不需要验证码11111111111')
+            sleep(randint(5, 8))
+            ck = self.get_ck()
+            return ck
+    def transmission_api(self, account_id, image_id, image_size_info, image_title, image_type, image_url):
+        # url = 'http://192.168.2.97:6661/microservice-visual/visual/fileSystem/saveImageDetail?token=dacce869-0471-4ec7-ac50-3b3b1ec22c87'
+        url = 'http://wx.yswg.com.cn:8000/microservice-visual/visual/fileSystem/saveImageDetail?token=dacce869-0471-4ec7-ac50-3b3b1ec22c87'
+        transmission_data = {}
+        transmission_data['accountId'] = account_id
+        transmission_data['imageId'] = image_id
+        transmission_data['imageSizeInfo'] = image_size_info
+        transmission_data['imageTitle'] = image_title
+        transmission_data['imageType'] = image_type
+        transmission_data['imageUrl'] = image_url
+        data_json = json.dumps(transmission_data)
+        max_retries = 3
+        retries = 0
+        while retries <= max_retries:
+            headers = {
+                "authorization": self.token
+            }
+            try:
+                response = requests.post(url, data=data_json,headers=headers)
+                if response.status_code == 200:
+                    return response.json()
+                else:
+                    print(url,'2323')
+                    print(f'请求失败，状态码: {response.status_code}，重试 ({retries}/{max_retries})')
+                    retries += 1
+            except requests.exceptions.RequestException as e:
+                print(f'请求异常: {e}，重试 ({retries}/{max_retries})')
+                retries += 1
+                self.get_microservice_token()
+        raise Exception(f'请求失败，已达到最大重试次数：{max_retries} 次')
+    def get_jpg(self, cookies, image_id):
+        json_data = {
+            'required_cookies': '',
+            'content': [
+                {
+                    'content_id': f'{image_id}',
+                    'content_type': 'photo',
+                    'content_size': 'huge',
+                    'content_format': 'jpg',
+                    'license_name': 'standard',
+                    'show_modal': True,
+                },
+            ],
+        }
+        response = requests.post(
+            'https://www.shutterstock.com/napi/licensees/current/redownload',
+            cookies=cookies,
+            headers=self.headers,
+            json=json_data,
+            timeout=600
+        )
+        image_url = json.loads(response.text)['meta']['licensedContent'][0]['downloadUrl']
+        return image_url
+    def get_png(self, cookie, image_id):
+        json_data = {
+            'required_cookies': '',
+            'content': [
+                {
+                    'content_id': f'{image_id}',
+                    'content_type': 'photo',
+                    'content_size': 'large',
+                    'content_format': 'png',
+                    'include_shadows': True,
+                    'angle': 'G03',
+                    'license_name': 'standard',
+                    'show_modal': True,
+                },
+            ],
+        }
+        response = requests.post('https://www.shutterstock.com/napi/licensees/current/redownload', cookies=cookie,
+                                 headers=self.headers, json=json_data, timeout=600)
+        image_url = json.loads(response.text)['meta']['licensedContent'][0]['downloadUrl']
+        return image_url
+    def get_pic(self, account_id, image_id, item_id, image_title, image_size_info, cookie, wait_time):
+        retry = 0
+        max_retries = 3
+        while retry <= max_retries:
+            try:
+                start_time = datetime.now().strftime("%m-%d %H:%M:%S")
+                all_image_id = Con.get_all_image_id()
+                if str(image_id) in set(all_image_id):
+                    print(f'{image_id}已上传过')
+                    state = 3
+                else:
+                    state = 1
+                # 尝试获取 JPG
+                try:
+                    image_url = self.get_jpg(cookie, image_id)
+                    image_type = 'jpg'
+                except Exception as e:
+                    if 'meta' in str(e):
+                        # JPG 失败，尝试 PNG
+                        image_url = self.get_png(cookie, image_id)
+                        image_type = 'png'
+                    else:
+                        raise
+                # 构建 item 数据
+                item = {
+                    'account_id': account_id,
+                    'image_id': image_id,
+                    'image_size_info': image_size_info,
+                    'image_title': image_title,
+                    'image_type': image_type,
+                    'image_url': image_url,
+                    'state': state,
+                    'created_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                }
+                # 存储数据 & 调用 API
+                Con.save_stock_detail(item)
+                Con.update_image_id_to_3(item_id)
+                self.transmission_api(account_id, image_id, image_size_info, image_title, image_type, image_url)
+                now_time = datetime.now().strftime("%m-%d %H:%M:%S")
+                print(f'pic_name:{image_title[:38]},time:{start_time}——{now_time}爬取成功')
+                time.sleep(wait_time)
+                return True
+            except Exception as e:
+                logging.error(f"发生错误: {e}")
+                retry += 1
+                if 'image_title' in str(e):
+                    Con.update_image_id_to_4(item_id)
+                    print(f'{image_id}过期 修改为4')
+                    return False
+                elif 'meta' in str(e):
+                    if retry > max_retries:
+                        logging.warning("超过重试次数，跳过该图片")
+                        return False
+                    logging.warning(f"meta 错误，等待两小时刷新页面 第{retry}次重试...")
+                    time.sleep(7200)
+                    self.page.get('https://www.shutterstock.com/zh/catalog/')
+                    continue  # 继续下一次重试
+                else:
+                    if retry > max_retries:
+                        logging.warning("超过重试次数，跳过该图片")
+                        return False
+                    sleep_time = [random.randint(60, 180), random.randint(180, 240), random.randint(1800, 1900)][
+                        retry - 1]
+                    logging.warning(f"未知错误，等待{sleep_time}s 第{retry}次重试...")
+                    time.sleep(sleep_time)
+                    continue  # 继续下一次重试
+    def run_get_stock_img_id(self, account, cookie):
+        """封装GetStockImgId.run()调用"""
+        try:
+            get_img_id = GetStockImgId()
+            get_img_id.run(account, cookie)
+            return True
+        except Exception as e:
+            logging.error(f"Error occurred in GetStockImgId.run(): {e}")
+            return False
+    def run(self):
+        day = time.strftime("%d")
+        for item_id in range(1, 33):
+            print(f"开始抓取 item_id: {item_id}")
+            self.page.clear_cache()  # 清除浏览器缓存和session信息。下一个账号直接登录。优化上一个账号没有退出导致新账号登录失败
+            if item_id == 1 and int(day) < 2:
+                Con.update_all_states_to_1(state=2)
+            wait_time = random.uniform(6, 10)
+            account_list = Con.get_cookie_account(item_id)
+            if account_list:
+                self.account = account_list[0]
+                self.pwd = account_list[1]
+                cookie = self.login()  # 登录并获取cookie
+                # cookie = self.get_ck()
+                if not self.run_get_stock_img_id(self.account, cookie):
+                    logging.critical("Stopping the entire program due to critical error.")
+                    sys.exit(1)  # 终止整个程序
+                image_id_id_pairs = Con.get_stock_images_id(self.account)
+                if not image_id_id_pairs:
+                    print(f'{self.account} 已全部爬取完成')
+                    Con.update_all_states_to_1(state=3, item_id=item_id)
+                    continue
+                counts_start = 0
+                counts_last = len(image_id_id_pairs)
+                stop_flag = False  # 初始化变量
+                for count in range(counts_start, counts_last):
+                    image_id, item_id_str, image_title, image_size_info = image_id_id_pairs[count].split('||-||')
+                    print(f'执行 {self.account}: {image_id}, {item_id_str}, 计数: {count}')
+                    try:
+                        chong_shi = self.get_pic(self.account, image_id, item_id_str, image_title, image_size_info,
+                                                 cookie, wait_time)
+                        if not chong_shi:
+                            stop_flag = True
+                            break
+                    except Exception as e:
+                        if 'Expected axis has 0 elements, new values have 2 elements' in str(e):
+                            print(f'{self.account} 已全部爬取完成')
+                            time.sleep(10)
+                        else:
+                            logging.error(f'发生错误: {e}, 停止循环')
+                            break
+                    if count == counts_last - 1:
+                        print(f'{self.account} 全部爬取完成1122==')
+                        Con.update_all_states_to_1(state=3, item_id=item_id)
+                if stop_flag:
+                    print('超过重试次数，暂停')
+                    logging.warning('超过重试次数，暂停')
+                    break
+if __name__ == '__main__':
+    GetSS_details().run()