## Preparation & Data Fetching

In [None]:
import os
import json
from datetime import datetime
from prisma import Prisma
import pandas as pd

# set the python path correctly for module imports to work
import sys
sys.path.append('../../')

from src.modules.participant_analytics.compute_correctness import compute_correctness
from src.modules.participant_analytics.get_participant_responses import get_participant_responses
from src.modules.participant_analytics.aggregate_analytics import aggregate_analytics
from src.modules.participant_analytics.save_participant_analytics import save_participant_analytics
from src.modules.participant_analytics.compute_participant_analytics import compute_participant_analytics
from src.modules.participant_analytics.compute_participant_course_analytics import compute_participant_course_analytics


In [None]:
db = Prisma()

# set the environment variable DATABASE_URL to the connection string of your database
os.environ['DATABASE_URL'] = 'postgresql://klicker:klicker@localhost:5432/klicker-prod'

db.connect()

# Script settings
verbose = False

# Settings which analytics to compute
compute_daily = True
compute_weekly = True
compute_monthly = True
compute_course = True

## Daily / Weekly / Monthly Analytics

In [None]:
# Print all dates between the 2022-10-23 and today
start_date = '2022-10-01'
end_date = datetime.now().strftime('%Y-%m-%d')
date_range_daily = pd.date_range(start=start_date, end=end_date, freq='D')
date_range_weekly = pd.date_range(start=start_date, end=end_date, freq='W')
date_range_monthly = pd.date_range(start=start_date, end=end_date, freq='ME')

if compute_daily:
    # Iterate over the date range and compute the participant analytics for each day
    for curr_date in date_range_daily:
        print(f'Computing daily participant analytics for {curr_date.strftime('%Y-%m-%d')}')
        specific_date = curr_date.strftime('%Y-%m-%d')

        # Fetch all question response detail entries for a specific day
        start_date = specific_date + 'T00:00:00.000Z'
        end_date = specific_date + 'T23:59:59.999Z'

        # Compute participant analytics for a specific day
        timestamp = start_date
        compute_participant_analytics(db, start_date, end_date, timestamp, "DAILY", verbose)

if compute_weekly:
    # Iterate over the date range and compute the participant analytics for each week
    for curr_date in date_range_weekly:
        # Fetch all question response detail entries for a specific week
        end_date = curr_date.strftime('%Y-%m-%d') + 'T23:59:59.999Z'
        start_date = (curr_date - pd.DateOffset(days=6)).strftime('%Y-%m-%d') + 'T00:00:00.000Z'
        print(f'Computing weekly participant analytics for {start_date} to {end_date}')

        # Compute participant analytics for a specific week
        timestamp = end_date
        compute_participant_analytics(db, start_date, end_date, timestamp, "WEEKLY", verbose)

if compute_monthly:
    # Iterate over the date range and compute the participant analytics for each month
    for curr_date in date_range_monthly:
        # Fetch all question response detail entries for a specific month
        end_date = curr_date.strftime('%Y-%m-%d') + 'T23:59:59.999Z'
        start_date = (curr_date - pd.offsets.MonthBegin(1)).strftime('%Y-%m-%d') + 'T00:00:00.000Z'
        print(f'Computing monthly participant analytics for {start_date} to {end_date}')

        # Compute participant analytics for a specific month
        timestamp = end_date
        compute_participant_analytics(db, start_date, end_date, timestamp, "MONTHLY", verbose)


## Course-Wide Participant Analytics (update daily?)

In [None]:
# Fetch all ongoing / past courses
if compute_course:
    curr_date = '2024-08-27'
    courses = db.course.find_many(
        where={
            # Incremental scripts can add this statement to reduce the amount of required computations
            # 'endDate': {
            #     'gt': datetime.now().strftime('%Y-%m-%d') + 'T00:00:00.000Z'
            # }
            'startDate': {
                'lte': curr_date + 'T23:59:59.999Z'
            }
        }
    )

    df_courses = pd.DataFrame(list(map(lambda x: x.dict(), courses)))
    print("Found {} courses with a start date before {}".format(len(df_courses), curr_date))

    courses_without_responses = compute_participant_course_analytics(db, df_courses, verbose)

    print("Found {} courses without any responses".format(courses_without_responses))
        

In [None]:
# Disconnect from the database
db.disconnect()