# KeanWISE Watchdog - Crawler Demo
This is a **interactive demo** of the **web crawler** part of the project.  
The following code mainly solves the problem of `logging in KeanWISE`, `request http interface`, and `securely logging off the system`.

## KeanWISE Account
Please enter an available KeanWISE account in the cell below to run the demo.

In [22]:
UserName = ''
Password = ''
student_id = None   # Optional for querying section details
section_id = ''

In [23]:
import os
import json

# If config file exists -> Load configuration from json file
file_path = 'demo_config.json'
if os.path.exists(file_path):
    with open(file_path, 'r') as config_file:
        config_dict = json.load(config_file)
        UserName = config_dict['UserName']
        Password = config_dict['Password']
        student_id = config_dict['student_id']
        section_id = config_dict['section_id']

## Prepare Environment
Import the used libraries and set the default values.

In [25]:
# Import libraries
import requests
import warnings
from bs4 import BeautifulSoup

In [26]:
# Disable SSL verification
# Used with verify=False
# Need to be solved: KeanWISE will cause SSL error when accessed by requests lib
import urllib3
# urllib3.disable_warnings()

In [27]:
# Urls
host = 'selfservice.kean.edu'
origin = 'https://{}'.format(host)
student_url = '{}/Student'.format(origin)
login_url = '{}/Account/Login'.format(student_url)
logoff_url = '{}/Account/LogOff'.format(student_url)
planning_url = '{}/Planning'.format(student_url)
degree_plans_url = '{}/DegreePlans'.format(planning_url)
section_details_url = '{}/Student/Courses/SectionDetails'.format(student_url)

# The basic headers
basic_headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
}

## Logging in KeanWISE
Perform the logging in action to get the session logged in.

In [28]:
def check_status_code(code:int|requests.Response, expectation:int|list[int]=200, msg:str='Status code checking failed!', raise_exception:bool=True) -> bool:
    # Get status code
    if isinstance(code, requests.Response): code = code.status_code
    # Prepare message
    msg = '{} Status code: {}'.format(msg, code)
    # Check
    if isinstance(expectation, list):
        if code not in expectation: 
            if raise_exception:
                raise Exception(msg)
            else:
                warnings.warn(msg)
                return False
    else:
        if code != expectation: 
            if raise_exception:
                raise Exception(msg)
            else:
                warnings.warn(msg)
                return False
    return True

# Extract the request verification token from the page source code
# Used to pass the CSRF verification
def get_request_verification_token(content:str|requests.Response, index:int=None) -> str|list[str]:
    # get Response --> extract HTML doc
    if isinstance(content, requests.Response): content = content.content
    # Parse content as HTML doc
    soup = BeautifulSoup(content, 'lxml')
    token_attrs = {
        'name': '__RequestVerificationToken',
        'value': True
    }
    tokens = soup.find_all('input', attrs=token_attrs, recursive=True)
    if len(tokens) == 0: 
        warnings.warn('Found no request verification tokens')
        return None
    # Extract tokens
    tokens = [token['value'] for token in tokens]
    # Return tokens
    if index is None: return tokens if len(tokens) != 1 else tokens[0]
    else: return tokens[index]

# Perform the logging in action
# The foundation of all other functions
def get_logged_in_session(UserName:str=UserName, Password:str=Password, login_url:str=login_url, headers:dict[str:str]=basic_headers, returnUrl:str='', performSamlLogin:str='') -> tuple[requests.Session, requests.Response]:
    # Create a new requests session
    # KeanWISE uses cookies to verify user authorization
    req_session = requests.Session()
    req_session.verify = False
    # Request the login page to get Request Verification Token
    res = req_session.get(login_url, headers=headers)
    check_status_code(res, 200, msg='Request login page failed!')
    # Login request:
    #   type: POST
    #   fields: {__RequestVerificationToken, returnUrl, performSamlLogin, UserName, Password}
    data = {
        '__RequestVerificationToken': get_request_verification_token(res, index=-1),
        'returnUrl': returnUrl,
        'performSamlLogin': performSamlLogin,
        'UserName': UserName,
        'PassWord': Password,
    }
    # Request the login interface
    res = req_session.post(login_url, data=data, headers=headers)
    check_status_code(res, 200, msg='Request login interface failed!')
    if res.url == login_url: raise Exception('Login Failed! Wrong user name or password.')
    # Return the logged in session and page
    return (req_session, res)

In [29]:
req_session, res = get_logged_in_session()



## Get the section information
Invoke the KeanWISE web API to get the course information.

In [30]:
# Helper method: Generate the headers for different occasions
def generate_headers(response:requests.Response=None, Host:bool|str=True, Origin:bool|str=True, Referer:bool|str=True, XHR:bool=False, token:bool|str=False) -> dict[str: str]:
    # Use the basic headers as the base
    headers = basic_headers.copy()
    # Add elements to headers
    if Host != False:
        headers['Host'] = Host if isinstance(Host, str) else host
    if Origin != False:
        headers['Origin'] = Origin if isinstance(Origin, str) else origin
    if XHR: headers['X-Requested-With'] = 'XMLHttpRequest'
    if Referer != False:
        if isinstance(Referer, str): headers['Referer'] = Referer
        elif Referer and response is not None: headers['Referer'] = response.url
        else: warnings.warn('Unable to generate referer')
    if token != False:
        if isinstance(token, str): headers['__RequestVerificationToken'] = token
        elif token and response is not None: headers['__RequestVerificationToken'] = get_request_verification_token(response, 0)
        else: warnings.warn('Unable to generate __RequestVerificationToken')
    # Return headers
    return headers

In [31]:
# Request the KeanWISE section details API to query the course details
def get_section_details(session:requests.Session, last_response:requests.Session, section_id:str|int=section_id, student_id:str|int=student_id, url:str=section_details_url, return_json:bool=True) -> dict[str: str]|requests.Response:
    # Generate request headers
    headers = generate_headers(last_response, Host=True, Origin=True, Referer=True, XHR=True, token=True)
    # Constuct request json dict
    json_data = {
        'sectionId': section_id if not isinstance(section_id, int) else str(section_id),
        'studentId': student_id if not isinstance(student_id, int) else str(student_id),
    }
    # Send query request
    #   type: POST (XHR)
    #   fields: {sectionId, studentId} (as json)
    res = session.post(url, headers=headers, json=json_data)
    # Check response
    check_status_code(res, 200, msg='Query section details ({}) failed!'.format(section_id))
    # Return section details as json or query response
    return res.json() if return_json else res

In [32]:
section_details = get_section_details(req_session, res)
section_details



{'RequisiteItems': [{'DisplayText': 'Prerequisite(s): CPS 2232',
   'DisplayTextExtension': '- Must be completed prior to taking this course.',
   'RequisiteId': '12920',
   'IsRequired': True,
   'ReferencesInvalidCourseOrSection': False}],
 'TimeLocationItems': [{'Time': 'M, W 8:30 AM - 9:45 AM',
   'Location': ' WENZHOU-KEAN UNIVERSITY, WKU Gehekai Hall B209 (Lecture)',
   'Dates': '8/29/2022 - 12/16/2022'}],
 'InstructorItems': [{'Name': 'Dib, O',
   'EmailAddresses': ['odib@kean.edu'],
   'PhoneNumbers': []}],
 'InstructorOfficeHours': [],
 'BooksCostRequired': None,
 'BooksCostOptional': None,
 'BookList': None,
 'ShowBookList': True,
 'BooksTotal': '',
 'TermDisplay': 'Fall 2022 Wenzhou',
 'DatesDisplay': '8/29/2022 - 12/16/2022',
 'PassNoPassIsRestricted': False,
 'AuditIsRestricted': True,
 'GradingOptionsMatch': True,
 'TransferStatusDescription': '',
 'TopicCodeDescription': None,
 'Charges': [],
 'HasRuleBasedCharges': False,
 'ShowInstantEnrollmentBookstoreLink': False,
 '

In [33]:
# Show the name, capacity and available seats of the section
print('Section name: {}*{}'.format(section_details['CourseName'], section_details['Number']))
print('Section capacity: {}'.format(section_details['Capacity']))
print('Available seats: {}'.format(section_details['Available']))

Section name: CPS*3440*W02
Section capacity: 24
Available seats: 0


## Log off KeanWISE
Perform the log off action to release the connection.

In [34]:
# Control the session to log off
# Prevent the possible multi-logging problem
def session_log_off(session:requests.Session, last_response:requests.Response, logoff_url:str=logoff_url) -> requests.Response:
    # Generate request headers
    headers = generate_headers(last_response, Host=True, Origin=False, Referer=True)
    # Send logoff request
    #   type: GET
    res = session.get(logoff_url, headers=headers)
    # Check response
    check_status_code(res, 200, msg='Resquest logoff interface failed!')
    # Check redirection
    if res.url != login_url: raise Exception('Logoff failed! Please check the status.')
    # Return the logged off session and page
    return res

In [35]:
# Log off
res = session_log_off(req_session, res)

