In [None]:
import os
import json
from prisma import Prisma
import pandas as pd

In [None]:
db = Prisma()

# set the environment variable DATABASE_URL to the connection string of your database
os.environ['DATABASE_URL'] = 'postgresql://klicker:klicker@localhost:5432/klicker-prod'

db.connect()

In [None]:
response_details = db.questionresponsedetail.find_many(
    include={
        "participation": True,
        "elementInstance": {"include": {"elementStack": True} },
    },
    take=10000,
)

print(response_details[0])

In [None]:
responses = db.questionresponse.find_many(
    include={
        "participation": True,
        "elementInstance": {"include": {"elementStack": True}},
    },
    take=10000,
)

print(responses[0])

In [None]:
def map_response_details(response_details):
    as_dict = response_details.dict()

    extended_dict = {
        **as_dict,
        "elementStack": as_dict['elementInstance']['elementStack'],
    }

    return extended_dict


response_details_mapped = list(map(map_response_details, response_details))

response_details_mapped[0]

In [None]:
responses_mapped = list(map(map_response_details, responses))

responses_mapped[0]

In [None]:
df_response_details = pd.json_normalize(response_details_mapped, sep='_')

print(df_response_details.columns)
print(df_response_details.head())

In [None]:
df_responses = pd.json_normalize(responses_mapped, sep='_')

print(df_responses.columns)
print(df_responses.head())

# Response Details and Activity Analytics

In [None]:
df_response_details_relevant = df_response_details[['id', 'score', 'pointsAwarded', 'xpAwarded', 'createdAt', 'participantId', 'participation_courseId']]

df_response_details_relevant['createdAt'] = pd.to_datetime(df_response_details_relevant['createdAt'].dt.date)

df_response_details_relevant = df_response_details_relevant.rename(columns={'participation_courseId': 'courseId'})

df_response_details_relevant.head()

In [None]:
# group by participantId, courseId and createdAt
# count the number of responses and sum the score, pointsAwarded, xpAwarded

df_response_details_relevant_daily = df_response_details_relevant.groupby(['participantId', 'courseId', 'createdAt']).agg({'id': 'count', 'score': 'sum', 'pointsAwarded': 'sum', 'xpAwarded': 'sum'})

# df_relevant_daily = df_relevant_daily.reset_index()

# df_relevant_daily = df_relevant_agg.sort_values(by=['participantId', 'courseId', 'createdAt'], ascending=True)

df_response_details_relevant_daily.head()

In [None]:
df_response_details_relevant_daily.info()

In [None]:
# create a dataframe with weekly aggregated data
df_response_details_relevant_weekly = df_response_details_relevant_daily.groupby(['participantId', 'courseId']).resample('W', level='createdAt', ).sum()

df_response_details_relevant_weekly.head()

In [None]:
df_response_details_relevant_weekly.info()

In [None]:
# create a dataframe with monthly aggregated data
df_response_details_relevant_monthly = df_response_details_relevant_daily.groupby(['participantId', 'courseId']).resample('ME', level='createdAt', ).sum()

df_response_details_relevant_monthly.head()

In [None]:
df_response_details_relevant_monthly.info()

In [None]:
df_response_details_relevant_mean = df_response_details_relevant.groupby(['courseId']).agg({'id': 'count', 'score': 'mean', 'pointsAwarded': 'mean', 'xpAwarded': 'mean'})

df_response_details_relevant_mean.head()

In [None]:
df_response_details_relevant_daily.to_csv('out/df_response_details_relevant_daily.csv')
df_response_details_relevant_weekly.to_csv('out/df_response_details_relevant_weekly.csv')
df_response_details_relevant_monthly.to_csv('out/df_response_details_relevant_monthly.csv')
df_response_details_relevant_mean.to_csv('out/df_response_details_relevant_mean.csv')

# Responses and Quiz Analytics

In [None]:
', '.join(df_responses.columns)

In [None]:
df_responses_relevant = df_responses[
    [
        "id",
        "trialsCount",
        "totalScore",
        "totalPointsAwarded",
        "totalXpAwarded",
        "lastAwardedAt",
        "lastXpAwardedAt",
        "lastAnsweredAt",
        "correctCount",
        "correctCountStreak",
        "lastCorrectAt",
        "partialCorrectCount",
        "lastPartialCorrectAt",
        "wrongCount",
        "lastWrongAt",
        "eFactor",
        "interval",
        "nextDueAt",
        "createdAt",
        "updatedAt",
        "participantId",
        "participation_courseId",
        "elementStack_type",
        "elementStack_microLearningId",
        "elementStack_practiceQuizId",
        "elementStack_liveQuizId",
        "elementStack_groupActivityId",
    ]
]

df_responses_relevant["createdAt"] = pd.to_datetime(
    df_responses_relevant["createdAt"].dt.date
)

df_responses_relevant["updatedAt"] = pd.to_datetime(
    df_responses_relevant["updatedAt"].dt.date
)

df_responses_relevant = df_responses_relevant.rename(
    columns={"participation_courseId": "courseId"}
)

df_responses_relevant.head()

# Cleanup

In [None]:
db.disconnect()