# **Data Ingestion Notebook**

### **Step 0. Run ***pip install -r requirements.txt*** in your Terminal** 
#### (you may need to include either a "python" or "python -m" prefix)

### **Step 1. Import Required Libraries**

In [None]:
# canvasapi used as API Wrapper
# Pandas used as Data Exploration and Manipulation Tool

from canvasapi import Canvas
import pandas as pd 

### **Step 2. Initialize Constants and Master Canvas Object**

In [None]:
# Canvas API URL
API_URL = "https://uvu.instructure.com"

# Canvas API key (follow instructions on README file)
API_KEY = 'INSERT TOKEN HERE'

# Canvas User ID
USER_ID = # USER_ID

# Initialize a new Canvas object
canvas = Canvas(API_URL, API_KEY)

### **Step 3. Initialize Lists**

In [None]:
# These lists will act as temporary data stores and be the blueprints for the tables to be used in the Transactional Database

courses = []
course_objects = []
course_assignments = []
course_assignment_objects = []
course_assignment_submissions = []

### **Step 4. Load Data from All Courses into DataFrame Object**

In [None]:
# iterate through all available courses, append the raw strings to a list of dictionaries, append the Course objects to a separate list
for course in canvas.get_courses():
    courses.append({'course_id':course.id, 'course_name':course.name})
    course_objects.append(course)

# output list of dictionaries as Pandas DataFrame
courses = pd.DataFrame(courses)

# view results
courses

### **Step 5. Load All Assignments from All Courses into DataFrame Object**

In [None]:
# OUTER FOR LOOP: iterate through all available courses
# INNER FOR LOOP: for each course, iterate over all available assignments, append full Assignment Objects to list, append the raw strings to a list of dictionaries
for course in course_objects:
    for assignment in course.get_assignments():
        course_assignment_objects.append(assignment)
        course_assignments.append({'course_id':course.id, 'assignment_id':assignment.id, 'assignment_name':assignment.name, 'description':assignment.description, 'submitted':assignment.has_submitted_submissions, 'points_possible':assignment.points_possible, 'submission_types':assignment.submission_types})
        
# output list of dictionaries as Pandas DataFrame
course_assignments = pd.DataFrame(course_assignments)

# view results
course_assignments

### **Step 6. Load All Assignment Submissions from All Assignments in All Courses into List**

In [None]:
# OUTER FOR LOOP: iterate through all available courses
# INNER FOR LOOP: for each Assignment Object in the "course_assignment_objects" list, iterate over each available submission made by the user (you)
for course in course_objects:
    for assignment in course_assignment_objects:
        course_assignment_submissions.append(assignment.get_submission(USER_ID))

### **Step 7. Load All Submission Data into DataFrame Object**

In [None]:
submission_info = []
for submission in course_assignment_submissions:
    # only include assignments that have been completed
    if submission.attempt != None:
        submission_info.append({'assignment_id':submission.assignment_id, 'attachments':submission.attachments, 'attempt':submission.attempt, 'body':submission.body, 'due_date':submission.cached_due_date, 'grade':submission.entered_grade, 'score':submission.entered_score, 'extra_attempts':submission.extra_attempts, 'submission_id':submission.id, 'late':submission.late, 'submission_type':submission.submission_type, 'submitted_at':submission.submitted_at})

# output list of dictionaries as Pandas DataFrame
submissions = pd.DataFrame(submission_info)

# reorder columns in DataFrame
submissions = submissions[['assignment_id', 'submission_id', 'submission_type', 'body', 'attempt', 'extra_attempts', 'due_date', 'grade', 'score', 'late', 'submitted_at']]

# view results
submissions

### **Output All DataFrame Objects to CSV Files**

In [None]:
courses_file = '[YOUR NAME]_courses.csv'
assignments_file = '[YOUR_NAME]_assignments.csv'
submissions_file = '[YOUR_NAME]_submissions.csv'
courses.to_csv(courses_file, index=False)
course_assignments.to_csv(assignments_file, index=False)
submissions.to_csv(submissions_file, index=False)