### Installations

In [None]:
#%pip install tqdm

### Imports

In [3]:
import json
import requests
import pandas as pd
from time import sleep
from datetime import datetime
from tqdm.notebook import tqdm, trange

### Globals

In [92]:
# API Constants
ACADEMY_ID = "<YOUR_ACADEMY_ID>"
API_KEY = "<YOUR_API_KEY>"
BASE_URL = "https://app.ca.schoox.com/api/v1"

PARAMS = {
    "apikey": API_KEY,
    "acadId": ACADEMY_ID
}

GET_USER_LIMIT = 500

# Delta tables
progress_table = 'schoox_progress.csv'
users_table = 'schoox_users.csv'
courses_table = 'schoox_courses.csv'

DASH = u'\u2500'

### Function Definitions

In [None]:
# ========= Main api call function =========

def get_response_json(request, parameters=None):
    ENDPOINT = f"{BASE_URL}/{request}"
    if parameters == None:
        parameters = PARAMS
    else:
        parameters.update(PARAMS)
    
    try:
        response = requests.get(ENDPOINT, params=parameters)

        if response.status_code == 200:
            return json.loads(response.text)
        else:
            for i in range(5):
                sleep(3)
                response = requests.get(ENDPOINT, params=parameters)
                if response.status_code == 200:
                    return json.loads(response.text)
                else:
                    continue
                
            print(f"❌ Error: {response.status_code}: {response.text}")

    except requests.RequestException as e:
        print(f"❌ Request failed: {e}")


# User Functions ================================================================================

def generate_all_units():
    units = pd.DataFrame(get_response_json('units'))
    teams = sorted(list(units['name']),reverse=True)

    aboves_df = pd.DataFrame(get_response_json('aboves',{'limit':1000}))
    divisions = list(aboves_df[aboves_df['type_id']==746475]['name'])
    companies = list(aboves_df[aboves_df['type_id']==746464]['name'])
    roles = list(aboves_df[aboves_df['type_id']==746463]['name'])

    return {'teams': teams, 'companies': companies, 'divisions': divisions, 'roles': roles}


# Final user function
def get_all_users():
    print("⚙️ Fetching user data...", end='\r')
    parameters = {"limit":GET_USER_LIMIT}
    parameters.update(PARAMS)

    response_json = get_response_json("users", parameters)
    data_df = pd.DataFrame(response_json)

    all_units = generate_all_units()

    data_df['team'] = data_df['units'].apply(lambda y: next((x for x in all_units['teams'] if x in str(y)), ''))
    data_df['role'] = data_df['above_units'].apply(lambda y: next((x for x in all_units['roles'] if x in str(y)), ''))
    data_df['division'] = data_df['above_units'].apply(lambda y: next((x for x in all_units['divisions'] if x in str(y)), ''))
    data_df['company'] = data_df['above_units'].apply(lambda y: next((x for x in all_units['companies'] if x in str(y)), ''))

    print(f"✅ Data of {len(data_df)} users loaded.")
    return data_df[['id','firstname','lastname','email','company','division','team','role','username']]


# Courses Functions =============================================================================

def extract_names(dict_item):
    return f"{dict_item['firstname']} {dict_item['lastname']}"

def extract_categories(dict_list):
    return [d['name'] for d in dict_list]

# Final courses function
def get_all_courses():
    print("⚙️ Fetching course data...", end='\r')
    parameters = {"limit":100, "start":0}
    parameters.update(PARAMS)
    response_json = get_response_json("courses", parameters)
    data_df = pd.DataFrame(response_json)

    while len(response_json) > 0:
        parameters["start"] += 100
        response_json = get_response_json("courses", parameters)
        data_df = pd.concat([data_df, pd.DataFrame(response_json)], ignore_index=True)

    data_df['categories'] = data_df['categories'].apply(extract_categories)
    data_df['instructor'] = data_df['instructor'].apply(extract_names)
    data_df['categories'] = data_df['categories'].apply(lambda x: ', '.join(x)).apply(lambda x: x.replace('-', ', '))
    data_df.scope = data_df.scope.astype(str)

    print(f"✅ Data of {len(data_df)} courses loaded.")

    return data_df[['id','title','instructor','students','level','categories','scope']]


# Progress Functions ============================================================================

def get_user_details(user_id:str, parameters=None):
    if parameters != None:
        parameters.update(PARAMS)
    
    response_json = get_response_json(f"users/{user_id}", parameters)
    
    data_df = pd.DataFrame(response_json)
    
    return data_df


def get_user_course_details(user_id:str, parameters=None):
    if parameters != None:
        parameters.update(PARAMS)
    else:
        parameters = {"userId": user_id}
    
    response_json = get_response_json("courses", parameters)
    
    data_df = pd.DataFrame(response_json)
    
    return data_df


def get_user_progress(user_id):
    
    response_json = get_response_json(f"dashboard/users/{user_id}/courses")
    data_df = pd.DataFrame(response_json)
    data_df['user_id'] = user_id
    
    return data_df


# Final progress function
def get_all_progress():
    print("⚙️ Fetching progress data...", end='\r')
    cols = [
            'user_id','id','title','url','progress','total_time','due_date','is_due',
            'last_progress','completions_count','completed_by_admin','assignee_first_name',
            'assignee_last_name','enroll_date','timecompleted','certificates','enrolled',
            'archived','completed_as_equivalent','external_id','required','compliance_course'
            ]
    data_df = pd.DataFrame(columns=cols)
    id_list = [d['id'] for d in get_response_json("users", {'limit':GET_USER_LIMIT})]
    
    for id in tqdm(id_list):
        try:
            response = get_response_json(f"dashboard/users/{id}/courses",{'dropped_out':'false'})
            if response != []:
                response_df = pd.DataFrame(response)
                response_df['user_id'] = id
                data_df = pd.concat([data_df, response_df], ignore_index=True)
        except Exception as E:
            print(f"{id}: {E}")
    
    print(f"✅ Progress data of {len(id_list)} users loaded.")

    return data_df[cols]


# Misc Functions ============================================================================

def save_data(df,filepath):
    try:
        print("⚙️ Saving Data...", end='\r')
        df.to_csv(filepath, index=False)
        print(f"✅ Data Saved to {filepath}")
    except:
        print("❌ Error saving data. Please check the file path or format.")

### Main

In [None]:
# Get current day of the week
day = datetime.today().strftime('%A')

# Trigger values. 1 to run and 0 to skip
user_trigger = 1
course_trigger = 1
progress_trigger = 1

# Fetch and save course data if the current day is Monday
print("USERS " + DASH * 29)
if user_trigger == 1:
    users = get_all_users()
    users = users[(users['username']!='solutions.architect') & (~users['email'].str.contains('@schoox.com'))]   # Remove schoox admin users
    save_data(users, users_table)

# Fetch and save course data if the current day is Monday
print("\nCOURSES " + DASH * 27)
if course_trigger == 1:
    courses = get_all_courses()
    save_data(courses, courses_table)

# Fetch and save progress data regardless of the day
print("\nPROGRESS " + DASH * 26)
if progress_trigger == 1:
    progress = get_all_progress()
    save_data(progress, progress_table)

### Testing

### Archived Functions

In [91]:
def generate_groups():
    types_df = pd.DataFrame(get_response_json("types"))                         # superset types
    types_df.rename(columns={"id":"type_id", "name":"type_name"}, inplace=True)

    aboves_df = pd.DataFrame(get_response_json("aboves",{"limit":1000}))        # supersets
    aboves_df = aboves_df.iloc[:,0:3]

    groups = aboves_df.merge(types_df, on="type_id", suffixes=("_aboves", "_type"))
    groups_df = groups[~groups["type_name"].isin(["All Users","Manager", "Organization"])]

    return groups_df


def extract_dept(dict_list):
    if len(dict_list) > 0:
        return dict_list[0]['name']
    else: 
        return ''

def extract_role(dict_list):
    if len(dict_list) > 0 and 'jobs' in list(dict_list[0]):
        return dict_list[0]['jobs'][0]['name']
    else:
        return ''

def extract_worker_type(dict_list, groups_df):
    worker_types = list(groups_df[groups_df['type_name']=='Type (Safety)']['name'])
    for i in [d['name'] for d in dict_list]:
        if i in worker_types:
            return i

def extract_company(dict_list, groups_df):
    companies = list(groups_df[groups_df['type_name']=='Company']['name'])
    for i in [d['name'] for d in dict_list]:
        if i in companies:
            return i

✅ Data Saved to depts/new_users.csv
