In [19]:
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import logging
from bs4 import BeautifulSoup

In [12]:
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 全局变量和配置
base_url = "http://guizhou.zxjxjy.com"

# Headers 配置
get_headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '107',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Origin': 'http://guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'
}

check_headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

submit_headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '126',
    'Content-Type': 'application/json',
    'Cookie': 'Hm_lvt_b21659b538990a950b60a32e93668ae4=1721696256; HMACCOUNT=B11A600C74D21728; edu-s=09c8957c97d0486f9a1c00019fc12c8d; Hm_lpvt_b21659b538990a950b60a32e93668ae4=1721697830',
    'Host': 'guizhou.zxjxjy.com',
    'Origin': 'http://guizhou.zxjxjy.com',
    'Referer': 'http://guizhou.zxjxjy.com/p/classroom/simple?coursewareid=fbb5c5dd51284388abf1ee0f46e91387&courseid=c12ec941a16f47a2aa7d8b869f684cea',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

def get_all_cookies(driver):
    """获取当前页面所有cookie信息"""
    all_cookies = driver.get_cookies()
    return {cookie['name']: cookie['value'] for cookie in all_cookies}

def update_headers(original_headers, new_cookie=None, coursewareid=None, courseid=None):
    """更新headers"""
    updated_headers = original_headers.copy()
    
    if new_cookie:
        if isinstance(new_cookie, dict):
            cookie_string = '; '.join(f"{k}={v}" for k, v in new_cookie.items())
            updated_headers['Cookie'] = cookie_string
        elif isinstance(new_cookie, str):
            updated_headers['Cookie'] = new_cookie
        else:
            raise ValueError("new_cookie must be either a dictionary or a string")
    
    if coursewareid or courseid:
        referer = updated_headers['Referer']
        base_url, params = referer.split('?')
        param_dict = dict(param.split('=') for param in params.split('&'))
        
        if coursewareid:
            param_dict['coursewareid'] = coursewareid
        if courseid:
            param_dict['courseid'] = courseid
        
        new_params = '&'.join(f"{k}={v}" for k, v in param_dict.items())
        updated_headers['Referer'] = f"{base_url}?{new_params}"
    
    return updated_headers

def get_action(courseid, coursewareid, lesson_location, headers):
    """发送GET请求"""
    url = f"{base_url}/p/action/get"
    payload = {
        "courseid": courseid,
        "coursewareid": coursewareid,
        "lessonlocation": lesson_location,
    }
    try:
        response = requests.post(url, headers=headers, data=payload)
        response.raise_for_status()
        logger.info(f"GET Action Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"GET Action request failed: {e}")

def check_limit(courseid, coursewareid, headers):
    """检查限制"""
    url = f"{base_url}/play/checklimit?courseid={courseid}&cwid={coursewareid}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        logger.info(f"Check Limit Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"Check Limit request failed: {e}")

def submit_process(courseid, coursewareid, lesson_location, headers):
    """提交进度"""
    url = f"{base_url}/p/play/submitProcess"
    payload = {
        "courseId": courseid,
        "coursewareId": coursewareid,
        "lessonLocation": round(lesson_location, 6),
    }
    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        logger.info(f"Submit Process Response: {response.status_code} - {response.text}")
    except requests.RequestException as e:
        logger.error(f"Submit Process request failed: {e}")
def extract_course_info(html_content):
    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all div elements with class 'xszt_ztl'
    divs = soup.find_all('div', class_='xszt_ztl')
    
    course_info_list = []
    
    for div in divs:
        course_info = {}
        
        # Extract coursewareid and courseid from the 'a' tag
        a_tag = div.find('a', class_=['xszt_time jx_xuex', 'xszt_time ks_xuex'])
        if a_tag:
            href = a_tag.get('href', '')
            course_info['coursewareid'] = href.split('coursewareid=')[1].split('&')[0] if 'coursewareid=' in href else None
            course_info['courseid'] = href.split('courseid=')[1] if 'courseid=' in href else None
            course_info['target'] = a_tag.text.strip()
        
        # Extract video duration
        duration_span = div.find('span', class_='color_d')
        if duration_span:
            course_info['duration'] = duration_span.text.strip()
        
        # Extract course name
        name_span = div.find('span', class_='xszt_name')
        if name_span:
            course_info['name'] = name_span.get('title', '')
        
        # Extract progress
        progress_em = div.find('em', class_=lambda x: x and x.startswith('jdt_bar_'))
        if progress_em:
            course_info['progress'] = progress_em.text.strip()
        
        course_info_list.append(course_info)
    
    return course_info_list
def watch_video(start_time,video, driver):
    """观看视频的主要逻辑，更新以反映不同的时间间隔和lesson_location"""
    courseid = video["courseid"]
    coursewareid = video["coursewareid"]
    duration = video["duration"]
    
    get_lesson_location = start_time
    submit_lesson_location = start_time
    last_get_time = time.time()
    last_submit_time = time.time()
    last_check_time = time.time()
    
    while submit_lesson_location < duration:
        current_time = time.time()
        cookies = get_all_cookies(driver)
        # 每15秒执行一次get_action
        if current_time - last_get_time >= 15:
            get_lesson_location += 15
            get_action(courseid, coursewareid, get_lesson_location, update_headers(get_headers, new_cookie=cookies,coursewareid=coursewareid,courseid=courseid))


            last_get_time = current_time

        # 每次get_action后执行两次check_limit
        if current_time - last_check_time >= 15:
            check_limit(courseid, coursewareid, update_headers(check_headers, new_cookie=cookies,coursewareid=coursewareid,courseid=courseid))
            time.sleep(0.5)  # 短暂延迟以模拟两次请求之间的间隔
            check_limit(courseid, coursewareid, update_headers(check_headers, new_cookie=cookies,coursewareid=coursewareid,courseid=courseid))
            last_check_time = current_time

        # 每30秒执行一次submit_process
        if current_time - last_submit_time >= 30:
            submit_lesson_location += 30
            submit_process(courseid, coursewareid, submit_lesson_location, update_headers(submit_headers, new_cookie=cookies,coursewareid=coursewareid,courseid=courseid))
            last_submit_time = current_time

    logger.info(f"Video completed: courseid={courseid}, coursewareid={coursewareid}")


In [13]:
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "localhost:9998")
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(60)

In [None]:
videos = [
    {"courseid": "1b5b19c3868b48d5943cd1602d593234", "coursewareid": "841babaa3e064c24860030a7d4cfa814", "duration": 2400},
    {"courseid": "1b5b19c3868b48d5943cd1602d593234", "coursewareid": "3ec85ec1f7e34d0db77434e1e7ea76b3", "duration": 2400},
    {"courseid": "1b5b19c3868b48d5943cd1602d593234", "coursewareid": "4a23cbeb499849f8acfa9a113ef7c8dd", "duration": 2400},
    # 添加更多视频
]

for video in videos:
    try:
        watch_video(0,video, driver)
    except Exception as e:
        logger.error(f"Error watching video: {e}")

