# 01. Download

- Download all the files from a given Canvas course, into `_data/*`

- Structure of course data:
  - `_data/course-${course_id}` (course_id is the number in the canvas url)
    - `files/`
      - < insert a download of all the files >
    - `posts/` (perhaps tbd)
      - < extract of all the posts, markdown perhaps? >
    - `modules/`, `announcements/`, etc. Folders here named for the part of canvas something came from.


In [None]:
# TODO

## Setup & Imports

In [49]:
import os
import requests
from tai_index import example
from dotenv import load_dotenv
import urllib.request

In [50]:
load_dotenv()
canvas_token = os.getenv('CANVAS_TOKEN')

canvas_url = os.getenv('CANVAS_URL')

## Download Functions

### Files Downloader

In [51]:
def download_files(course_id):
    
    # Set up API request for files
    files_url = f'{canvas_url}/courses/{course_id}/files'
    headers = {'Authorization': f'Bearer {canvas_token}'}

    # Loop to account for pagination!
    while(files_url):

        files_response = requests.get(files_url, headers=headers)

        # Break if this is not a link
        if not 'Link' in files_response.headers:
            break

        # More breaking
        links = requests.utils.parse_header_links(files_response.headers['Link'].rstrip('>').replace('>,<', ',<'))
        files_url = None
        for link in links:
            if link['rel'] == 'next':
                files_url = link['url']
                break

        # Get list of files
        files = files_response.json()

        # Create a directory to store the downloaded files
        directory = f'_data/course-{course_id}/files/'
        if not os.path.exists(directory):
            os.makedirs(directory)

        # Iterate through files in files
        for file in files:

            print("Downloading: " + file['display_name'] + "...")
            
            # Set up API request for file
            filepath = os.path.join(directory, file['display_name'])
            if not os.path.exists(filepath):
                urllib.request.urlretrieve(file['url'], filepath)


### Announcements Downloader

In [55]:
def download_announcements(course_id):
    # Define the API endpoint for discussions
    discussion_url = f'{canvas_url}/courses/{course_id}/discussion_topics?only_announcements=true'
    # Set up the headers with the authorization token
    headers = {
        "Authorization": f"Bearer {canvas_token}"
    }
    # Make the API request to get discussion topics
    response = requests.get(discussion_url, headers=headers)
    if response.status_code == 200:
        announcements = response.json()
        # Generate HTML content
        html_content = "<html><body>"
        for announcement in announcements:
            html_content += f"<h2>{announcement['title']}</h2>"
            html_content += f"<p>{announcement['message']}</p>"
        html_content += "</body></html>"
        # Write HTML content to a file
        directory = f'_data/course-{course_id}/announcements/'
        if not os.path.exists(directory):
            os.makedirs(directory)

        filepath = os.path.join(directory, "announcements.html")

        with open(filepath, "w") as html_file:
            html_file.write(html_content)
        print("Announcements saved to announcements.html.")
    else:
        print(f"Error fetching announcements. Status code: {response.status_code}")