In [47]:
import logging
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains

In [48]:
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 全局变量和配置
base_url = "http://guizhou.zxjxjy.com"

# Headers 配置
get_headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '107',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Origin': 'http://guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'
}

check_headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

submit_headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '126',
    'Content-Type': 'application/json',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Origin': 'http://guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

In [49]:
def get_all_cookies(driver):
    """获取当前页面所有cookie信息"""
    all_cookies = driver.get_cookies()
    return {cookie['name']: cookie['value'] for cookie in all_cookies}

def update_headers(original_headers, new_cookie=None, coursewareid=None, courseid=None):
    """更新headers"""
    updated_headers = original_headers.copy()
    
    if new_cookie:
        if isinstance(new_cookie, dict):
            cookie_string = '; '.join(f"{k}={v}" for k, v in new_cookie.items())
            updated_headers['Cookie'] = cookie_string
        elif isinstance(new_cookie, str):
            updated_headers['Cookie'] = new_cookie
        else:
            raise ValueError("new_cookie must be either a dictionary or a string")
    
    if coursewareid or courseid:
        referer = updated_headers['Referer']
        base_url, params = referer.split('?')
        param_dict = dict(param.split('=') for param in params.split('&'))
        
        if coursewareid:
            param_dict['coursewareid'] = coursewareid
        if courseid:
            param_dict['courseid'] = courseid
        
        new_params = '&'.join(f"{k}={v}" for k, v in param_dict.items())
        updated_headers['Referer'] = f"{base_url}?{new_params}"
    
    return updated_headers

def get_action(courseid, coursewareid, lesson_location, headers):
    """发送GET请求"""
    url = f"{base_url}/p/action/get"
    payload = {
        "courseid": courseid,
        "coursewareid": coursewareid,
        "lessonlocation": lesson_location,
    }
    try:
        response = requests.post(url, headers=headers, data=payload)
        response.raise_for_status()
        logger.info(f"GET Action Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"GET Action request failed: {e}")

def check_limit(courseid, coursewareid, headers):
    """检查限制"""
    url = f"{base_url}/play/checklimit?courseid={courseid}&cwid={coursewareid}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        logger.info(f"Check Limit Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"Check Limit request failed: {e}")

def submit_process(courseid, coursewareid, lesson_location, headers):
    """提交进度"""
    url = f"{base_url}/p/play/submitProcess"
    payload = {
        "courseId": courseid,
        "coursewareId": coursewareid,
        "lessonLocation": round(lesson_location, 6),
    }
    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        logger.info(f"Submit Process Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"Submit Process request failed: {e}")


def parse_duration(duration_str):
    """将时间字符串解析为秒数"""
    hours, minutes, seconds = map(int, duration_str.split(':'))
    return hours * 3600 + minutes * 60 + seconds

def parse_progress(progress_str):
    """将进度字符串解析为浮点数"""
    return float(progress_str.strip('%')) / 100

def extract_course_info(html_content):
    """提取课程信息并解析进度和时长"""
    soup = BeautifulSoup(html_content, 'html.parser')
    divs = soup.find_all('div', class_='xszt_ztl')
    
    course_info_list = []
    
    for div in divs:
        course_info = {}
        
        a_tag = div.find('a', class_=['xszt_time jx_xuex', 'xszt_time ks_xuex'])
        if a_tag:
            href = a_tag.get('href', '')
            course_info['coursewareid'] = href.split('coursewareid=')[1].split('&')[0] if 'coursewareid=' in href else None
            course_info['courseid'] = href.split('courseid=')[1] if 'courseid=' in href else None
            course_info['target'] = a_tag.text.strip()
        
        duration_span = div.find('span', class_='color_d')
        if duration_span:
            course_info['duration'] = parse_duration(duration_span.text.strip())
        
        name_span = div.find('span', class_='xszt_name')
        if name_span:
            course_info['name'] = name_span.get('title', '')
        
        progress_em = div.find('em', class_=lambda x: x and x.startswith('jdt_bar_'))
        if progress_em:
            course_info['progress'] = parse_progress(progress_em.text.strip())
        
        course_info_list.append(course_info)
    
    return course_info_list

def determine_start_time(duration, progress):
    """根据进度决定观看起始时间"""
    watched_time = duration * progress
    start_time = max(0, watched_time - 120)  # 从进度前2分钟或0开始
    return round(start_time, 6)



def play_video_in_new_tab(driver, url, max_retries=3):
    original_window = driver.current_window_handle
    print(f"原始窗口句柄: {original_window}")

    for attempt in range(max_retries):
        try:
            print(f"尝试 {attempt + 1}")
            
            # 使用Selenium的原生方法打开新标签页
            driver.switch_to.new_window('tab')
            new_window = driver.current_window_handle
            print(f"新窗口句柄: {new_window}")

            # 在新标签页中打开指定的URL
            driver.get(url)
            print(f"已导航到目标URL: {driver.current_url}")
            
            # 等待页面加载
            time.sleep(5)
            try:
                # 等待元素出现
                element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, '#videoContainer > xg-start'))
                )

                # 获取元素的位置和大小
                location = element.location
                size = element.size

                # 计算元素中心点
                x = location['x'] + size['width'] / 2
                y = location['y'] + size['height'] / 2

                # 使用JavaScript模拟点击
                click_script = f"""
                    var evt = new MouseEvent('click', {{
                        bubbles: true,
                        cancelable: true,
                        view: window,
                        clientX: {x},
                        clientY: {y}
                    }});
                    document.elementFromPoint({x}, {y}).dispatchEvent(evt);
                """
                driver.execute_script(click_script)
                print(f"Successfully clicked element at position ({x}, {y})")
                time.sleep(3)
                driver.execute_script(click_script)
                driver.execute_script(click_script)
                print(f"Successfully clicked element at position ({x}, {y})")
                time.sleep(3)
                break
            except Exception as e:
                print(f"Attempt {attempt + 1} failed. Error: {str(e)}")
        except Exception as e:
            print(f"尝试 {attempt+1} 失败: {str(e)}")
            if attempt == max_retries - 1:
                print("达到最大重试次数，操作失败")
                raise
            time.sleep(2)  # 在重试之前稍作等待
        finally:
            # 关闭新标签页并返回原始窗口
            if driver.current_window_handle != original_window:
                driver.close()
            driver.switch_to.window(original_window)
            print("已返回原始页面")
def watch_video(video, driver):
    """观看视频的主要逻辑"""
    courseid = video["courseid"]
    coursewareid = video["coursewareid"]
    duration = video["duration"]
    progress = video["progress"]
    # 打开课程页面并播放
    course_url = f"http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid={coursewareid}&courseid={courseid}"
    play_video_in_new_tab(driver,course_url)
    start_time = determine_start_time(duration, progress)
    logger.info(f"Starting video: courseid={courseid}, coursewareid={coursewareid}, progress={progress:.2%}, start_time={start_time:.2f}")
    
    get_lesson_location = int(start_time)
    submit_lesson_location = start_time
    last_get_time = time.time()
    last_submit_time = time.time()
    last_check_time = time.time()
    
    while submit_lesson_location < duration:
        current_time = time.time()
        cookies = get_all_cookies(driver)
        
        if current_time - last_get_time >= 15:
            get_lesson_location = min(get_lesson_location + 15, duration)
            get_action(courseid, coursewareid, get_lesson_location, update_headers(get_headers, new_cookie=cookies, coursewareid=coursewareid, courseid=courseid))
            last_get_time = current_time

        if current_time - last_check_time >= 15:
            check_limit(courseid, coursewareid, update_headers(check_headers, new_cookie=cookies, coursewareid=coursewareid, courseid=courseid))
            time.sleep(0.5)
            check_limit(courseid, coursewareid, update_headers(check_headers, new_cookie=cookies, coursewareid=coursewareid, courseid=courseid))
            last_check_time = current_time

        if current_time - last_submit_time >= 30:
            submit_lesson_location = min(submit_lesson_location + 30, duration)
            submit_process(courseid, coursewareid, submit_lesson_location, update_headers(submit_headers, new_cookie=cookies, coursewareid=coursewareid, courseid=courseid))
            last_submit_time = current_time

        time.sleep(1)  # 避免过于频繁的循环

    # 确保最后一次提交是完整的视频长度
    if submit_lesson_location < duration:
        submit_process(courseid, coursewareid, duration, update_headers(submit_headers, new_cookie=cookies, coursewareid=coursewareid, courseid=courseid))
    
    logger.info(f"Video completed: courseid={courseid}, coursewareid={coursewareid}, final_time={duration:.2f}")


In [50]:
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "localhost:9998")
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(60)



In [51]:
courses = extract_course_info(driver.page_source)
courses

[{'duration': 3473, 'name': '生态学的十大范式（一）', 'progress': 1.0},
 {'duration': 2303, 'name': '生态学的十大范式（二）', 'progress': 1.0},
 {'duration': 1009, 'name': '生态学的十大范式（三）', 'progress': 1.0},
 {'duration': 2281, 'name': '理论生态学（一）', 'progress': 1.0},
 {'duration': 2308, 'name': '理论生态学（二）', 'progress': 1.0},
 {'duration': 2258, 'name': '理论生态学（三）', 'progress': 1.0},
 {'coursewareid': '21c7bb98f30b4c0ebabdbf4c95fb3a1d',
  'courseid': '7d2a902ceb4d4a50b614af4bc920c1fe',
  'target': '继续学习',
  'duration': 2363,
  'name': '全球生态学（一）',
  'progress': 0.95},
 {'coursewareid': '4c04c68e05e4441fbe83519047fe3a85',
  'courseid': '7d2a902ceb4d4a50b614af4bc920c1fe',
  'target': '开始学习',
  'duration': 2360,
  'name': '全球生态学（二）',
  'progress': 0.0},
 {'coursewareid': 'bfb60a39241f44b1b143177adcbe1faa',
  'courseid': '7d2a902ceb4d4a50b614af4bc920c1fe',
  'target': '开始学习',
  'duration': 2875,
  'name': '全球生态学（三）',
  'progress': 0.0},
 {'coursewareid': 'de43066189524f41a647a72de3d1cb90',
  'courseid': 'b7405ed8d2f14b2

In [52]:
try:
    for course in courses:
        if course['progress'] < 1:  # 只观看未完成的视频
            watch_video(course, driver)
        else:
            logger.info(f"Skipping completed video: {course['name']}")
    
except Exception as e:
    logger.error(f"An error occurred: {e}")
finally:
    print("All courses have been processed.")


2024-07-24 17:27:19,120 - INFO - Skipping completed video: 生态学的十大范式（一）
2024-07-24 17:27:19,121 - INFO - Skipping completed video: 生态学的十大范式（二）
2024-07-24 17:27:19,121 - INFO - Skipping completed video: 生态学的十大范式（三）
2024-07-24 17:27:19,121 - INFO - Skipping completed video: 理论生态学（一）
2024-07-24 17:27:19,122 - INFO - Skipping completed video: 理论生态学（二）
2024-07-24 17:27:19,123 - INFO - Skipping completed video: 理论生态学（三）


原始窗口句柄: 5F8685338CD0684BBE810C91FED5ED31
尝试 1
新窗口句柄: 038DCCD381BEAC483A3E9571976C943F
已导航到目标URL: http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=21c7bb98f30b4c0ebabdbf4c95fb3a1d&courseid=7d2a902ceb4d4a50b614af4bc920c1fe
Successfully clicked element at position (403.0, 497.0)
Successfully clicked element at position (403.0, 497.0)


2024-07-24 17:27:30,741 - INFO - Starting video: courseid=7d2a902ceb4d4a50b614af4bc920c1fe, coursewareid=21c7bb98f30b4c0ebabdbf4c95fb3a1d, progress=95.00%, start_time=2124.85


已返回原始页面


2024-07-24 17:27:46,095 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:27:46,177 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:27:46,762 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:28:01,011 - INFO - Submit Process Response: 200 - {
	"code":0,
	"data":"ok"
}
2024-07-24 17:28:02,240 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:28:02,320 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:28:02,908 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:28:17,241 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:28:17,310 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:28:17,903 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:28:31,150 - 

原始窗口句柄: 5F8685338CD0684BBE810C91FED5ED31
尝试 1
新窗口句柄: D3CC76AD347345B0E94A129EA2603CE0
已导航到目标URL: http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=4c04c68e05e4441fbe83519047fe3a85&courseid=7d2a902ceb4d4a50b614af4bc920c1fe
Successfully clicked element at position (403.0, 497.0)
Successfully clicked element at position (403.0, 497.0)


2024-07-24 17:31:46,029 - INFO - Starting video: courseid=7d2a902ceb4d4a50b614af4bc920c1fe, coursewareid=4c04c68e05e4441fbe83519047fe3a85, progress=0.00%, start_time=0.00


已返回原始页面


2024-07-24 17:32:01,484 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:32:01,569 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:02,144 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:16,478 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:32:16,553 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:17,129 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:17,322 - INFO - Submit Process Response: 200 - {
	"code":0,
	"data":"ok"
}
2024-07-24 17:32:31,700 - INFO - GET Action Response: 200 - {
	"action":"0",
	"success":true,
	"verificationDuration":0
}
2024-07-24 17:32:31,771 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:32,381 - INFO - Check Limit Response: 200 - {
	"code":0,
	"data":0
}
2024-07-24 17:32:46,757 - 