## Preparation & Data Fetching

In [None]:
import os
import json
from datetime import datetime
from prisma import Prisma
import pandas as pd

# set the python path correctly for module imports to work
import sys
sys.path.append('../../')

from src.modules.participant_analytics.compute_correctness import compute_correctness
from src.modules.participant_analytics.get_participant_responses import get_participant_responses
from src.modules.participant_analytics.aggregate_analytics import aggregate_analytics
from src.modules.participant_analytics.save_participant_analytics import save_participant_analytics


In [None]:
db = Prisma()

# set the environment variable DATABASE_URL to the connection string of your database
os.environ['DATABASE_URL'] = 'postgresql://klicker:klicker@localhost:5432/klicker-prod'

db.connect()

# Script settings
verbose = False

# Settings which analytics to compute
compute_daily = True
compute_weekly = False
compute_monthly = False
compute_course = False

## Daily Analytics

In [None]:
# Print all dates between the 2022-10-23 and today
# TODO: select this start date again (caution: computations take longer...)
# start_date = '2022-10-23'
start_date = '2024-06-20'
end_date = datetime.now().strftime('%Y-%m-%d')
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

if compute_daily:
    # Iterate over the date range and compute the participant analytics for each day
    for curr_date in date_range:
        print(f'Computing participant analytics for {curr_date.strftime('%Y-%m-%d')}')
        specific_date = curr_date.strftime('%Y-%m-%d')

        # Fetch all question response detail entries for a specific day
        start_date = specific_date + 'T00:00:00.000Z'
        end_date = specific_date + 'T23:59:59.999Z'
        participant_response_details = get_participant_responses(db, start_date, end_date, verbose)

        # Compute the correctness of each question response detail
        df_details, df_element_instances = compute_correctness(db, participant_response_details, verbose)

        if df_details is None:
            print(f'No participant responses found for {specific_date}')
            continue

        # Compute participant analytics (score/xp counts and correctness statistics)
        df_analytics = aggregate_analytics(df_details, verbose)
        if verbose:
            df_analytics.head()

        # Save the aggreagted analytics into the database
        timestamp = specific_date + 'T00:00:00.000Z'
        save_participant_analytics(db, df_analytics, timestamp, 'DAILY')

        # Delete the dataframes to avoid conflicts in the next iteration
        del df_details
        del df_element_instances
        del df_analytics


In [None]:
# Disconnect from the database
db.disconnect()