# Blackboard课程资料下载工具 - 交互式版本

这个notebook提供了一个交互式的方式来控制Blackboard下载过程。您可以逐步执行每个单元格来完成登录和下载操作。

## 使用说明
1. 先运行导入和配置部分
2. 初始化浏览器驱动
3. 根据需要逐步执行登录、验证码处理等操作
4. 可以随时查看和保存调试信息

## 特点
- 支持多种类型资料下载（课件、作业等）
- 自动处理验证码
- 保持会话活跃
- 详细的调试信息
- 可配置的下载选项

In [None]:
# 导入所需的包
import os
import time
import yaml
import requests
import json
import base64
import glob
import urllib.parse
import traceback
from datetime import datetime
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from urllib3.exceptions import InsecureRequestWarning
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
import ddddocr

# 禁用SSL警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

print("依赖项导入完成")

## 工具函数定义
下面的单元格包含了所有需要用到的工具函数。运行此单元格来定义这些函数。

In [None]:
def clean_debug_files():
    """清理旧的调试文件"""
    print("\n清理历史调试文件...")
    try:
        current_time = time.time()
        patterns = ['*.png', '*.html', '*.json', '*.log']
        for pattern in patterns:
            for file in glob.glob(pattern):
                if current_time - os.path.getmtime(file) > 600:
                    try:
                        os.remove(file)
                        print(f"已删除: {file}")
                    except:
                        continue
    except Exception as e:
        print(f"清理文件时出错: {str(e)}")

def save_debug_info(driver, prefix):
    """保存调试信息"""
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    try:
        # 保存页面源代码
        with open(f"{prefix}_{timestamp}_page.html", "w", encoding="utf-8") as f:
            f.write(driver.page_source)
            
        # 保存截图
        driver.save_screenshot(f"{prefix}_{timestamp}_screenshot.png")
        
        # 保存元素信息
        elements_info = []
        for element in driver.find_elements(By.XPATH, "//*"):
            try:
                element_info = {
                    "tag": element.tag_name,
                    "id": element.get_attribute("id"),
                    "class": element.get_attribute("class"),
                    "text": element.text[:100] if element.text else None
                }
                elements_info.append(element_info)
            except:
                continue
                
        with open(f"{prefix}_{timestamp}_elements.json", "w", encoding="utf-8") as f:
            json.dump(elements_info, f, ensure_ascii=False, indent=2)
            
        # 保存其他调试信息
        debug_info = {
            "url": driver.current_url,
            "title": driver.title,
            "cookies": driver.get_cookies(),
            "timestamp": timestamp
        }
        with open(f"{prefix}_{timestamp}_info.json", "w", encoding="utf-8") as f:
            json.dump(debug_info, f, ensure_ascii=False, indent=2)
            
    except Exception as e:
        print(f"保存调试信息失败: {str(e)}")

def setup_driver():
    """设置Chrome浏览器"""
    chrome_options = webdriver.ChromeOptions()
    
    # 基本设置
    download_directory = os.getcwd()  # 使用当前工作目录
    chrome_options.add_experimental_option("prefs", {
        "download.default_directory": download_directory,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    })
    
    # 禁用自动化标志
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    # SSL设置
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--ignore-ssl-errors')
    
    # 连接稳定性设置
    chrome_options.add_argument('--no-sandbox')  
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--disable-web-security')
    chrome_options.add_argument('--allow-running-insecure-content')
    
    # 窗口设置
    chrome_options.add_argument('--window-size=1920,1080')
    chrome_options.add_argument('--start-maximized')
    
    # 其他性能设置
    chrome_options.add_argument('--disable-extensions')
    chrome_options.add_argument('--disable-popup-blocking')
    chrome_options.add_argument('--disable-notifications')
    chrome_options.add_argument('--disable-dev-tools')
    
    # 设置user-agent
    chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
    
    # 创建浏览器服务
    service = webdriver.ChromeService(
        ChromeDriverManager().install(),
        service_args=['--verbose', '--log-path=chrome.log']
    )
    
    # 创建浏览器实例
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    # 设置超时
    driver.set_page_load_timeout(30)
    driver.set_script_timeout(30)
    driver.implicitly_wait(10)
    
    # 初始化会话
    try:
        driver.execute_cdp_cmd('Network.enable', {})
        driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Selenium': 'true'}})
        
        # 注入自定义JavaScript来维持会话
        driver.execute_script("""\
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            });
            window.navigator.chrome = {
                runtime: {}
            };
        """)
    except Exception as e:
        print(f"初始化会话时出错: {str(e)}")
    
    return driver

def solve_captcha(driver, max_attempts=3):
    """处理验证码，如果识别失败会尝试刷新重试"""
    for attempt in range(max_attempts):
        try:
            captcha_input = driver.find_element(By.ID, "captcha")
            captcha_img = driver.find_element(By.ID, "captchaImg")
            
            if attempt > 0:
                print(f"刷新验证码并重试... ({attempt + 1}/{max_attempts})")
                try:
                    captcha_img.click()
                except:
                    driver.execute_script("arguments[0].click();", captcha_img)
                time.sleep(2)
            
            print("正在识别验证码...")
            
            # 等待验证码图片加载
            WebDriverWait(driver, 5).until(
                lambda d: d.execute_script("return arguments[0].complete;", captcha_img)
            )
            
            # 获取验证码图片
            captcha_img.screenshot("temp_captcha.png")
            
            # 识别验证码
            ocr = ddddocr.DdddOcr(show_ad=False)
            with open("temp_captcha.png", "rb") as f:
                captcha_text = ocr.classification(f.read())
            
            # 删除临时文件
            os.remove("temp_captcha.png")
            
            if not captcha_text or len(captcha_text.strip()) != 4:
                print(f"验证码识别结果无效: {captcha_text}")
                continue
                
            print(f"验证码识别结果: {captcha_text}")
            
            # 填写验证码
            captcha_input.clear()
            captcha_input.send_keys(captcha_text)
            
            return True
            
        except Exception as e:
            if attempt == max_attempts - 1:
                print(f"验证码处理失败: {str(e)}")
            continue
            
    return False

def check_session_valid(driver):
    """检查当前会话是否有效"""
    try:
        # 检查登录状态
        if "authserver/login" in driver.current_url:
            return False
            
        # 尝试访问需要登录的页面
        driver.get("https://elearning.shanghaitech.edu.cn:8443/")
        
        # 等待页面加载，检查是否存在登录后才会出现的元素
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, "global-nav-link"))
        )
        
        return True
        
    except Exception as e:
        print(f"检查会话状态时出错: {str(e)}")
        return False

## 初始化浏览器会话
运行下面的单元格来创建一个新的浏览器会话。如果需要重新开始，可以重新运行此单元格。

In [19]:
clean_debug_files()



清理历史调试文件...
已删除: login_filled_20250402_034100_screenshot.png
已删除: login_filled_20250402_034341_screenshot.png
已删除: login_filled_20250402_034100_page.html
已删除: login_filled_20250402_034341_page.html
已删除: login_filled_20250402_034100_elements.json
已删除: login_filled_20250402_034100_info.json
已删除: login_filled_20250402_034341_elements.json
已删除: login_filled_20250402_034341_info.json


In [None]:
# 清理旧的调试文件

# 初始化浏览器
driver = setup_driver()
if driver:
    print("浏览器初始化成功")
else:
    print("浏览器初始化失败")

## 加载配置
运行此单元格来加载config.yaml中的配置信息。

## 用户偏好设置管理
运行此单元格来加载和管理用户偏好设置。

In [None]:
def load_credentials():
    """从配置文件加载登录凭据"""
    try:
        with open('config.yaml', 'r', encoding='utf-8') as f:
            config = yaml.safe_load(f)
            
        if not config or 'username' not in config or 'password' not in config:
            print("配置文件格式错误或缺少必要的登录信息")
            return None
            
        return {
            'username': config['username'],
            'password': config['password']
        }
    except FileNotFoundError:
        print("未找到配置文件 config.yaml")
        return None
    except Exception as e:
        print(f"读取配置文件时出错: {str(e)}")
        return None

def save_user_preferences(preferences):
    """保存用户偏好设置"""
    try:
        with open('user_preferences.yaml', 'w', encoding='utf-8') as f:
            yaml.dump(preferences, f, allow_unicode=True)
        return True
    except Exception as e:
        print(f"保存用户偏好设置失败: {str(e)}")
        return False

def load_user_preferences():
    """加载用户偏好设置"""
    try:
        with open('user_preferences.yaml', 'r', encoding='utf-8') as f:
            return yaml.safe_load(f) or {}
    except FileNotFoundError:
        return {}
    except Exception as e:
        print(f"加载用户偏好设置失败: {str(e)}")
        return {}

def get_default_preferences():
    """获取默认偏好设置"""
    return {
        'download_path': 'downloads',
        'auto_download': True,
        'file_types': ['pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx'],
        'keep_folder_structure': True,
        'auto_rename_duplicates': True,
        'retry_attempts': 3,
        'download_delay': 2,
        'session_timeout': 1800,
        'max_parallel_downloads': 3,
        'save_debug_info': True
    }

def merge_preferences(user_prefs):
    """合并用户偏好和默认设置"""
    default_prefs = get_default_preferences()
    merged = default_prefs.copy()
    merged.update(user_prefs)
    return merged

# 加载用户偏好设置
user_preferences = load_user_preferences()
preferences = merge_preferences(user_preferences)
print("用户偏好设置加载完成：")
for key, value in preferences.items():
    print(f"{key}: {value}")

## 访问登录页面
运行此单元格来打开登录页面。

In [None]:
try:
    print("访问统一身份认证页面...")
    driver.get("https://ids.shanghaitech.edu.cn/authserver/login")
    
    # 等待页面加载完成
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.ID, "username"))
    )
    print("登录页面加载完成")
    
except Exception as e:
    print(f"访问登录页面失败: {str(e)}")

## 填写登录表单
运行此单元格来填写用户名和密码。

In [None]:
config = load_credentials()
try:
    print("填写登录表单...")
    # 等待并获取输入框
    username_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "username"))
    )
    password_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "password"))
    )
    
    # 清除并填写
    username_input.clear()
    password_input.clear()
    username_input.send_keys(config['username'])
    password_input.send_keys(config['password'])
    
    print("登录表单填写完成")
    # 保存当前状态
    save_debug_info(driver, "login_filled")
    
except Exception as e:
    print(f"填写登录表单失败: {str(e)}")

## 处理验证码
运行此单元格来处理验证码。如果验证码识别失败，可以重新运行此单元格尝试。

In [None]:
try:
    # 检查是否需要验证码
    captcha_div = driver.find_element(By.ID, "captchaDiv")
    if "hide" not in captcha_div.get_attribute("class"):
        print("需要处理验证码")
        if solve_captcha(driver):
            print("验证码处理成功")
        else:
            print("验证码处理失败，请重新运行此单元格")
    else:
        print("无需验证码")
except Exception as e:
    print(f"验证码处理出错: {str(e)}")

## 提交登录表单
运行此单元格来提交登录表单并等待结果。

In [None]:
try:
    print("提交登录表单...")
    # 查找并点击登录按钮
    submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "login_submit"))
    )
    submit_button.click()
    
    # 等待页面加载
    time.sleep(3)
    
    # 检查登录结果
    if check_session_valid(driver):
        print("登录成功！")
        current_url = driver.current_url
        print(f"当前页面: {current_url}")
    else:
        print("登录失败")
        save_debug_info(driver, "login_failed")
        
except Exception as e:
    print(f"提交登录表单失败: {str(e)}")
    save_debug_info(driver, "login_error")

## 获取课程列表
登录成功后，运行此单元格获取可用的课程列表。

In [None]:
def get_courses(driver):
    """获取课程列表信息"""
    courses = []
    try:
        print("正在获取课程列表...")
        course_elements = driver.find_elements(By.CSS_SELECTOR, ".portletList-img.courseListing li")
        
        for element in course_elements:
            try:
                # 获取主链接元素
                link = element.find_element(By.CSS_SELECTOR, "a")
                
                # 获取课程链接URL
                href = link.get_attribute('href')
                course_url_id = ''
                if 'id=_' in href:
                    course_url_id = href.split('id=_')[1].split('_')[0]
                
                # 获取完整的文本内容
                full_text = link.text.strip()
                # 基本的文本清理
                text = full_text.replace('\n', ' ').replace('&nbsp;', ' ')
                while '  ' in text:  # 删除多余的空格
                    text = text.replace('  ', ' ')
                
                # 提取括号中的信息作为course_info
                course_info = ''
                if '(' in text and ')' in text:
                    start_idx = text.find('(')
                    end_idx = text.rfind(')')
                    course_info = text[start_idx + 1:end_idx].strip()
                    # 从主文本中移除课程信息部分
                    text = text[:start_idx].strip()
                
                # 分割剩余文本
                parts = text.split(' ')
                parts = [p.strip() for p in parts if p.strip()]  # 移除空字符串
                
                # 提取课程名称和代码
                course_name = ''
                course_id = ''
                
                # 寻找课程代码（通常是字母+数字的组合）
                for i, part in enumerate(parts):
                    if (any(c.isalpha() for c in part) and 
                        any(c.isdigit() for c in part) and 
                        len(part) >= 3):  # 假设课程代码至少3个字符
                        course_id = part
                        # 课程名称是课程代码之前的所有部分
                        course_name = ' '.join(parts[:i]).strip()
                        break
                
                # 如果上面的方法没有找到课程代码，使用最后一个非年份的部分作为课程代码
                if not course_id and parts:
                    for part in reversed(parts):
                        if '学年' not in part and '学期' not in part:
                            course_id = part
                            # 获取课程名称（除去课程代码和年份信息）
                            name_parts = []
                            for p in parts:
                                if p == course_id or '学年' in p or '学期' in p:
                                    break
                                name_parts.append(p)
                            course_name = ' '.join(name_parts).strip()
                            break
                
                # 如果还是没有课程名称，使用第一部分作为课程名称
                if not course_name and parts:
                    course_name = parts[0]
                    if len(parts) > 1:
                        course_id = parts[1]
                
                # 创建课程对象
                course = {
                    'name': course_name,
                    'id': course_id,
                    'url_id': course_url_id,
                    'info': course_info,
                    'url': href
                }
                # 只有当课程名称不为空时才添加到列表中
                if course_name:
                    courses.append(course)
                
            except Exception as e:
                print(f"处理单个课程时出错: {str(e)}")
                continue
                
        print(f"共找到 {len(courses)} 门课程")
        return courses
        
    except Exception as e:
        print(f"获取课程列表时出错: {str(e)}")
        return []

# 获取课程列表
courses = get_courses(driver)
# 显示获取到的课程信息
if courses:
    print("\n课程列表:")
    for i, course in enumerate(courses, 1):
        print(f"{i}. 课程名称: {course['name']}")
        print(f"   课程代码: {course['id']}")
        print(f"   课程信息: {course['info']}")

## 选择课程
运行此单元格来选择要下载的课程。您需要输入课程的序号。

In [None]:
def select_course(courses):
    """选择要下载的课程"""
    if not courses:
        print("没有可选的课程")
        return None
        
    while True:
        try:
            choice = int(input("请输入要下载的课程序号（1-{}）: ".format(len(courses))))
            if 1 <= choice <= len(courses):
                selected_course = courses[choice-1]
                print(f"已选择课程: {selected_course['name']}")
                return selected_course
            else:
                print("请输入有效的课程序号")
        except ValueError:
            print("请输入数字")

# 选择课程
selected_course = select_course(courses)

## 获取课程资料列表
运行此单元格来获取选定课程中的资料列表。

In [None]:
def get_course_materials(driver, url):
    """获取课程资料列表"""
    try:
        print("正在获取课程资料...")
        # 进入课程页面
        driver.get(url)
        
        # 等待并点击"内容"链接
        content_link = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "a span[title='内容']"))
        )
        content_link.find_element(By.XPATH, "..").click()
        
        # 等待资料列表加载
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "content_listContainer"))
        )
        
        # 获取所有文件项
        material_list = []
        file_items = driver.find_elements(By.CSS_SELECTOR, "#content_listContainer li")
        
        for item in file_items:
            try:
                # 获取文件标题和链接
                link_elem = item.find_element(By.CSS_SELECTOR, "h3 a")
                title = link_elem.find_element(By.CSS_SELECTOR, "span").text
                href = link_elem.get_attribute("onclick")
                
                # 解析文件下载链接
                if href and "content_id" in href:
                    # 从onclick属性中提取实际的下载链接
                    content_id = href.split("content_id=")[1].split("&")[0]
                    course_id = href.split("course_id=")[1].split("'")[0]
                    download_url = f"https://elearning.shanghaitech.edu.cn:8443/webapps/blackboard/execute/content/file?cmd=view&content_id={content_id}&course_id={course_id}"
                    
                    material = {
                        "name": title,
                        "url": download_url,
                        "type": "file"
                    }
                    material_list.append(material)
                    print(f"找到文件: {title}")
                
            except Exception as e:
                print(f"处理单个资料项时出错: {str(e)}")
                continue
                
        print(f"共找到 {len(material_list)} 个资料项")
        for i, material in enumerate(material_list, 1):
            print(f"{i}. {material['name']}")
            
        return material_list
    except Exception as e:
        print(f"获取课程资料失败: {str(e)}")
        save_debug_info(driver, "get_materials_error")
        return []

# 获取所选课程的资料列表
if selected_course:
    materials = get_course_materials(driver, selected_course['url'])
else:
    print("未选择课程，无法获取资料列表")

## 下载选定的资料
运行此单元格来下载选定的资料。您可以选择下载单个文件或所有文件。

In [None]:
def download_file(driver, url, save_dir):
    """下载文件并显示进度，支持PDF直接下载"""
    try:
        # 如果是PDF文件且包含特定的URL模式
        if 'cmd=view' in url:
            # 修改URL参数以强制下载
            url = url.replace('cmd=view', 'cmd=download')
            if '&launch_in_new=true' in url:
                url = url.replace('&launch_in_new=true', '')
        print
        # 继续原有的下载逻辑
        # 提取文件名
        filename = url.split('/')[-1]
        if '?' in filename:  # 处理带参数的URL
            filename = 'download.pdf'  # 对于无法获取文件名的情况使用默认名称

        filename = sanitize_filename(urllib.parse.unquote(filename))
        
        # 构建保存路径
        save_path = os.path.join(save_dir, filename)
        
        # 如果文件已存在，添加序号
        base_name, ext = os.path.splitext(filename)
        counter = 1
        while os.path.exists(save_path):
            new_filename = f"{base_name}_{counter}{ext}"
            save_path = os.path.join(save_dir, new_filename)
            counter += 1
            
        print(f"\n开始下载: {filename}")
            
        # 使用requests下载文件，添加更多headers模拟浏览器行为
        cookies = {cookie['name']: cookie['value'] for cookie in driver.get_cookies()}
        headers = {
            'User-Agent': driver.execute_script('return navigator.userAgent;'),
            'Accept': 'application/pdf,*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Referer': driver.current_url
        }
        
        response = requests.get(url, 
                               cookies=cookies, 
                               headers=headers,
                               verify=False, 
                               stream=True,
                               allow_redirects=True)
        response.raise_for_status()
        
        # 从响应头中获取文件名（如果可用）
        if 'Content-Disposition' in response.headers:
            content_disposition = response.headers['Content-Disposition']
            if 'filename=' in content_disposition:
                filename = content_disposition.split('filename=')[-1].strip('"')
                filename = sanitize_filename(urllib.parse.unquote(filename))

        # 获取文件大小
        total_size = int(response.headers.get('content-length', 0))
        block_size = 8192
        downloaded = 0
        
        # 保存文件并显示进度
        with open(save_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=block_size):
                if chunk:
                    f.write(chunk)
                    downloaded += len(chunk)
                    if total_size > 0:
                        percent = (downloaded / total_size) * 100
                        print(f"\r下载进度: {percent:.1f}%", end='')
                        
        print(f"\n下载完成: {filename}")
        return True
        
    except Exception as e:
        print(f"\n下载文件时出错: {str(e)}")
        print(f"尝试的URL: {url}")
        return False

def sanitize_filename(filename):
    """清理文件名，移除不允许的字符"""
    # 替换Windows不允许的文件名字符
    invalid_chars = '<>:"/\\|?*'
    for char in invalid_chars:
        filename = filename.replace(char, '_')
    
    # 删除开头和结尾的空格和点
    filename = filename.strip('. ')
    
    # 如果文件名为空，使用默认名称
    if not filename:
        filename = "unnamed_file"
        
    return filename

In [None]:

content_link = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "a[href*='cmd=view']"))
)
url = content_link.get_attribute("href")
driver.get(url)
download_file(driver, driver.current_url, preferences['download_path'])


In [None]:
def download_material(driver, material, save_dir="downloads"):
    """下载单个资料"""
    try:
        # 创建保存目录
        os.makedirs(save_dir, exist_ok=True)
        
        print(f"正在下载: {material['name']}")
        driver.get(material['url'])
        # Wait for the download link to be present and clickable
        content_link = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "a[href*='cmd=view']"))
        )
        url = content_link.get_attribute("href")
        driver.get(url)
        download_file(driver, driver.current_url, save_dir)
        # 等待下载完成
        time.sleep(3)  # 可以根据文件大小调整等待时间
        
        print(f"下载完成: {material['name']}")
        return True
    except Exception as e:
        print(f"下载失败: {str(e)}")
        return False

def download_all_materials(driver, materials, save_dir="downloads"):
    """下载所有资料"""
    if not materials:
        print("没有可下载的资料")
        return
        
    success = 0
    failed = 0

    for material in materials:
        if download_material(driver, material, save_dir):
            success += 1
        else:
            failed += 1
            
    print(f"\n下载完成：成功 {success} 个，失败 {failed} 个")

# 选择下载方式
if materials:
    choice = input("请选择下载方式：\n1. 下载所有文件\n2. 下载单个文件\n请输入选择（1或2）: ")
    
    if choice == "1":
        download_all_materials(driver, materials)
    elif choice == "2":
        while True:
            try:
                idx = int(input(f"请输入要下载的文件序号（1-{len(materials)}）: ")) - 1
                if 0 <= idx < len(materials):
                    download_material(driver, materials[idx])
                    break
                else:
                    print("请输入有效的序号")
            except ValueError:
                print("请输入数字")
    else:
        print("无效的选择")

## 会话维护
运行此单元格来保持浏览器会话的活跃状态。

In [None]:
def maintain_session(driver, interval=300):
    """定期检查和维护会话状态，每5分钟执行一次"""
    try:
        # 获取当前URL
        current_url = driver.current_url
        
        # 检查是否需要刷新会话
        if "authserver/login" in current_url:
            print("检测到登录已失效，尝试重新登录...")
            credentials = load_credentials()
            if credentials and login_with_retry(driver, credentials['username'], credentials['password']):
                print("重新登录成功")
                return True
            else:
                print("重新登录失败")
                return False
                
        # 定期访问主页保持会话活跃
        try:
            driver.get("https://elearning.shanghaitech.edu.cn:8443/")
            time.sleep(2)
            
            # 检查会话状态
            if check_session_valid(driver):
                print("会话状态正常")
                # 返回原页面
                if current_url != "https://elearning.shanghaitech.edu.cn:8443/":
                    driver.get(current_url)
                return True
        except Exception as e:
            print(f"保持会话时出错: {str(e)}")
            
        return False
        
    except Exception as e:
        print(f"维护会话时出错: {str(e)}")
        return False

# 启动会话维护
print("开始会话维护...")
maintain_session(driver)

## 文件下载增强功能
添加对不同类型文件的处理支持和下载进度显示。

In [None]:
def handle_assignment(driver, link, save_dir):
    """处理作业类型的资料"""
    try:
        # 进入作业页面
        driver.get(link)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "assignment"))
        )
        
        # 获取作业信息
        title = driver.find_element(By.CLASS_NAME, "assignmentTitle").text
        content = driver.find_element(By.CLASS_NAME, "assignmentContent").text
        instructions = driver.find_element(By.CLASS_NAME, "instructions").text
        
        # 保存作业信息
        assignment_info = {
            "title": title,
            "content": content,
            "instructions": instructions,
            "due_date": driver.find_element(By.CLASS_NAME, "dueDate").text if driver.find_elements(By.CLASS_NAME, "dueDate") else "无截止日期"
        }
        
        # 将作业信息保存为JSON文件
        filename = f"{sanitize_filename(title)}.json"
        with open(os.path.join(save_dir, filename), 'w', encoding='utf-8') as f:
            json.dump(assignment_info, f, ensure_ascii=False, indent=2)
            
        print(f"保存作业信息: {filename}")
            
        # 下载附件
        attachments = driver.find_elements(By.CSS_SELECTOR, ".attachments a")
        for attachment in attachments:
            try:
                attachment_url = attachment.get_attribute("href")
                if attachment_url:
                    download_file(driver, attachment_url, save_dir)
            except:
                continue
                
    except Exception as e:
        print(f"处理作业资料时出错: {str(e)}")
        raise

def handle_content(driver, link, save_dir):
    """处理普通课程内容"""
    try:
        # 进入内容页面
        driver.get(link)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "content_listContainer"))
        )
        
        # 获取页面内容
        content = driver.find_element(By.ID, "content_listContainer").text
        title = driver.find_element(By.CLASS_NAME, "pageTitle").text
        
        # 保存内容
        filename = f"{sanitize_filename(title)}.txt"
        with open(os.path.join(save_dir, filename), 'w', encoding='utf-8') as f:
            f.write(content)
            
        print(f"保存页面内容: {filename}")
            
        # 下载页面中的附件
        attachments = driver.find_elements(By.CSS_SELECTOR, "a[href*='/bbcswebdav/']")
        for attachment in attachments:
            try:
                attachment_url = attachment.get_attribute("href")
                if attachment_url:
                    download_file(driver, attachment_url, save_dir)
            except:
                continue
                
    except Exception as e:
        print(f"处理课程内容时出错: {str(e)}")
        raise

