# **Canvas API Exploration Notebook**

### **Step 0. Run ***pip install -r requirements.txt*** in your Terminal** 
#### (you may need to include either a "python" or "python -m" prefix)

### **Step 1. Import Required Libraries**

In [6]:
# canvasapi used as API Wrapper
# Pandas used as Data Exploration and Manipulation Tool

from canvasapi import Canvas
import pandas as pd

### **Step 2. Initialize Constants and Master Canvas Object**

In [7]:
# Canvas API URL
API_URL = "https://uvu.instructure.com"

# Canvas API key (follow instructions on README file)
API_KEY = open("API_KEY.txt", "r").read().strip()

# Initialize a new Canvas object
canvas = Canvas(API_URL, API_KEY)

# Canvas User ID
USER_ID = canvas.get_user('self').id

### **Step 3. Initialize Lists**

In [8]:
# These lists will act as temporary data stores and be the blueprints for the tables to be used in the Transactional Database

courses = []
course_objects = []
course_assignments = []
course_assignment_objects = []
course_assignment_submissions = []

### **Step 4. Load Data from All Courses into DataFrame Object**

In [9]:
# iterate through all available courses, append the raw strings to a list of dictionaries, append the Course objects to a separate list
for course in canvas.get_courses():
    courses.append({'course_id':course.id, 'course_name':course.name})
    course_objects.append(course)

# output list of dictionaries as Pandas DataFrame
courses = pd.DataFrame(courses)

# view results
courses

Unnamed: 0,course_id,course_name
0,521623,ART-1010-X02 | Summer 2021
1,533121,BIOL-1010-X01-X07-X08-Fall 2021-XLIST | Egan
2,516477,BIOL-1610-X05 | Spring 2021
3,512661,BIOL-1615-209 | Spring 2021
4,523168,CHEM-1010-001 | Summer 2021 B2
5,492699,COMM-1020-J40 | Spring 2020 HSS
6,516748,COMM-2110-X08 | Spring 2021
7,553394,CS-305G-601 | 2023 Spring - Full Term
8,555857,CS-496R-001 | 2023 Spring - Full Term
9,503456,CS-1400-X03 | Fall 2020


### **Step 5. Load All Assignments from All Courses into DataFrame Object**

In [10]:
# OUTER FOR LOOP: iterate through all available courses
# INNER FOR LOOP: for each course, iterate over all available assignments, append full Assignment Objects to list, append the raw strings to a list of dictionaries
for course in course_objects:
    for assignment in course.get_assignments():
        course_assignment_objects.append(assignment)
        course_assignments.append({'course_id':course.id, 'assignment_id':assignment.id, 'assignment_name':assignment.name, 'description':assignment.description, 'submitted':assignment.has_submitted_submissions, 'points_possible':assignment.points_possible, 'submission_types':assignment.submission_types})
        
# output list of dictionaries as Pandas DataFrame
course_assignments = pd.DataFrame(course_assignments)

# view results
course_assignments

Unnamed: 0,course_id,assignment_id,assignment_name,description,submitted,points_possible,submission_types
0,521623,5499199,1.10 Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,20.0,[online_upload]
1,521623,5499195,1.3 Discussion Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,10.0,[discussion_topic]
2,521623,5499196,2.9 Discussion Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,20.0,[discussion_topic]
3,521623,5499202,3.8 Assignment: Feldman Method Critique,"<link rel=""stylesheet"" href=""https://instructu...",True,35.0,[online_upload]
4,521623,5499201,3.8 Assignment: Extra Credit Peer Review,"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_upload]
...,...,...,...,...,...,...,...
1786,519681,5322841,Exam 1 - proctorio (Remotely Proctored),"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_quiz]
1787,519681,5322843,Exam 2 - proctorio (Remotely Proctored),,True,0.0,[online_quiz]
1788,519681,5322845,Exam 3 - proctorio (Remotely Proctored),"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_quiz]
1789,519681,5322844,Exam 4 - proctorio (Remotely Proctored),,True,0.0,[online_quiz]


### **Step 6. Load All Assignment Submissions from All Assignments in All Courses into List**
#### **Current Bottleneck**

In [11]:
# for each Assignment Object in the "course_assignment_objects" list, iterate over each available submission made by the user (you)

for assignment in course_assignment_objects:
    course_assignment_submissions.append(assignment.get_submission('self'))

### **Step 7. Load All Submission Data into DataFrame Object**

In [12]:
submission_info = []
for submission in course_assignment_submissions:
    # only include assignments that have been completed
    try:
        if submission.attempt != None:
            submission_info.append({'assignment_id':submission.assignment_id, 'attachments':submission.attachments, 'attempt':submission.attempt, 'body':submission.body, 'due_date':submission.cached_due_date, 'grade':submission.entered_grade, 'score':submission.entered_score, 'extra_attempts':submission.extra_attempts, 'submission_id':submission.id, 'late':submission.late, 'submission_type':submission.submission_type, 'submitted_at':submission.submitted_at})
    except:
        pass
# output list of dictionaries as Pandas DataFrame
submissions = pd.DataFrame(submission_info)

# reorder columns in DataFrame
submissions = submissions[['assignment_id', 'submission_id', 'submission_type', 'body', 'attachments', 'attempt', 'extra_attempts', 'due_date', 'grade', 'score', 'late', 'submitted_at']]

# view results
submissions

Unnamed: 0,assignment_id,submission_id,submission_type,body,attachments,attempt,extra_attempts,due_date,grade,score,late,submitted_at
0,5499199,146683593,online_upload,,[1.10 Assignment.pdf],1,,2021-05-23T05:59:00Z,20,20.0,False,2021-05-21T20:01:53Z
1,5499195,146683425,discussion_topic,,[],1,,2021-05-16T05:59:00Z,complete,10.0,False,2021-05-12T23:01:43Z
2,5499196,146683467,discussion_topic,,[],1,,2021-05-30T05:59:00Z,20,20.0,False,2021-05-30T01:33:51Z
3,5499202,146683719,online_upload,,[Assignment_ Feldman Method Critique.pdf],1,,2021-06-03T05:59:00Z,35,35.0,False,2021-06-03T03:18:03Z
4,5499203,146683761,online_upload,,[4.18 Assignment.pdf],1,,2021-06-13T05:59:00Z,12.5,12.5,True,2021-06-13T05:59:45Z
...,...,...,...,...,...,...,...,...,...,...,...,...
1251,5322841,141469385,online_quiz,"<link rel=""stylesheet"" href=""https://instructu...",[],1,,,0,0.0,False,2021-02-06T03:59:21Z
1252,5322843,141469499,online_quiz,"<link rel=""stylesheet"" href=""https://instructu...",[],1,,2021-03-06T06:59:00Z,0,0.0,False,2021-03-06T00:22:07Z
1253,5322845,141469613,online_quiz,"<link rel=""stylesheet"" href=""https://instructu...",[],1,,2021-04-03T05:59:00Z,0,0.0,False,2021-04-03T03:44:15Z
1254,5322844,141469556,online_quiz,"<link rel=""stylesheet"" href=""https://instructu...",[],1,,2021-04-28T05:59:00Z,0,0.0,False,2021-04-27T20:49:16Z


### **Output All DataFrame Objects to CSV Files**

In [15]:
import os

courses_file = f'{USER_ID}_courses'
assignments_file = f'{USER_ID}_assignments'
submissions_file = f'{USER_ID}_submissions'

courses_file = os.path.join('test_files', courses_file)
assignments_file = os.path.join('test_files', assignments_file)
submissions_file = os.path.join('test_files', submissions_file)

courses.to_parquet(courses_file, index=False, compression='snappy')
course_assignments.to_parquet(assignments_file, index=False, compression='snappy')
submission.drop(columns=['attachments'], inplace=True)
submissions.to_parquet(submissions_file, index=False, compression='snappy')

ArrowInvalid: ('Could not convert File(_requester=<canvasapi.requester.Requester object at 0x000001572C778050>, id=106761986, uuid=uzYCe8YGuelowyPfsATkfwno29PRteA0EWenok2A, folder_id=4526141, folder_id_date=4526-05-21 00:00:00+00:00, display_name=1.10 Assignment.pdf, filename=1.10+Assignment.pdf, upload_status=success, content-type=application/pdf, url=https://uvu.instructure.com/files/106761986/download?download_frd=1&verifier=uzYCe8YGuelowyPfsATkfwno29PRteA0EWenok2A, size=38700, created_at=2021-05-21T20:01:52Z, created_at_date=2021-05-21 20:01:52+00:00, updated_at=2021-05-21T20:03:12Z, updated_at_date=2021-05-21 20:03:12+00:00, unlock_at=None, locked=False, hidden=False, lock_at=None, hidden_for_user=False, thumbnail_url=None, modified_at=2021-05-21T20:01:52Z, modified_at_date=2021-05-21 20:01:52+00:00, mime_class=pdf, media_entry_id=None, category=uncategorized, locked_for_user=False, preview_url=/api/v1/canvadoc_session?blob=%7B%22moderated_grading_allow_list%22:null,%22enable_annotations%22:true,%22enrollment_type%22:%22student%22,%22anonymous_instructor_annotations%22:false,%22submission_id%22:146683593,%22user_id%22:10120000001889694,%22attachment_id%22:106761986,%22type%22:%22canvadoc%22%7D&hmac=1094eaec1814011d044f71293b4c4c575fba642d) with type File: did not recognize Python value type when inferring an Arrow data type', 'Conversion failed for column attachments with type object')