In [2]:
import pandas as pd
import numpy as np

# Data Loading

In [3]:
df = pd.read_csv('/content/sample_data/processed.csv')
df

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.type,result.score.scaled,result.success,result.completion,Teaching
0,UEF,Advanced Data Management Systems,0,2023-07-07 18:55:47,viewed,course,,False,False,Flipped classroom
1,UEF,Advanced Data Management Systems,0,2023-07-07 15:21:18,viewed,link,,False,False,Flipped classroom
2,UEF,Advanced Data Management Systems,0,2023-07-07 15:08:13,viewed,link,,False,False,Flipped classroom
3,UEF,Advanced Data Management Systems,0,2023-07-07 15:08:10,viewed,course,,False,False,Flipped classroom
4,UEF,Advanced Data Management Systems,0,2023-07-07 09:03:32,viewed,course,,False,False,Flipped classroom
...,...,...,...,...,...,...,...,...,...,...
299286,BMU,Computer Networks,820,2023-06-11 22:57:27,answered,cmi.interaction,1.000,True,True,Flipped classroom
299287,BMU,Computer Networks,799,2023-05-16 00:05:45,answered,cmi.interaction,0.000,False,True,Flipped classroom
299288,BMU,Computer Networks,788,2023-05-12 22:10:50,completed,assessment,0.375,True,True,Flipped classroom
299289,BMU,Computer Networks,788,2023-02-27 22:45:33,completed,module,,False,False,Flipped classroom


In [4]:
#df['object.definition.type'] = df.apply(lambda row: change_assessment(row['verb.id'], row['object.definition.type']), axis=1)

In [5]:
df['object.definition.type'].value_counts()

course             102682
cmi.interaction     33546
resource            30790
discussion          25061
module              23838
quiz                21083
assessment          20319
attempt             14749
forum-topic         11768
link                10281
page                 2561
review               2322
meeting               197
survey                 88
lesson                  6
Name: object.definition.type, dtype: int64

In [28]:
df[['object.definition.type', 'verb.id']].value_counts()

object.definition.type  verb.id  
course                  viewed       102682
cmi.interaction         answered      33546
resource                viewed        30790
discussion              viewed        24130
attempt                 viewed        14749
quiz                    viewed        13171
module                  viewed        13111
forum-topic             viewed        11768
module                  completed     10727
link                    viewed        10281
assessment              viewed         6369
                        completed      5697
quiz                    receive        4260
assessment              scored         4236
page                    viewed         2561
assessment              start          2548
review                  receive        2322
quiz                    start          1852
                        completed      1800
assessment              submit         1469
discussion              create          931
meeting                 join            13

In [32]:
df[(df['object.definition.type'] == 'assessment') & (df['verb.id'] == 'scored')]['result.success'].value_counts()

True     4227
False       9
Name: result.success, dtype: int64

In [6]:
color_df = pd.DataFrame({
    'Institution': df['Institution'].unique(),
    'color': ['red', 'blue', 'green', 'yellow']
})
color_df = color_df.merge(df[['Course', 'Institution']].drop_duplicates(), how='left')

In [21]:
df[df['result.success'] == False][['verb.id', 'object.definition.type']].value_counts()

verb.id    object.definition.type
viewed     course                    102682
           resource                   30790
           discussion                 24130
           attempt                    14749
           quiz                       13171
           module                     13111
           forum-topic                11768
completed  module                     10727
viewed     link                       10281
answered   cmi.interaction             9241
viewed     assessment                  6369
receive    quiz                        4260
completed  assessment                  3189
viewed     page                        2561
start      assessment                  2548
receive    review                      2322
start      quiz                        1852
submit     assessment                  1469
create     discussion                   931
join       meeting                      136
viewed     survey                        76
leave      meeting                       3

# Idea 2

In [42]:
def change_assessment(verb, object_def_type):
  if object_def_type != 'assessment':
    return object_def_type

  if verb == 'submit' or verb == 'scored':
    object_def_type = 'homework'
  elif verb == 'completed' or verb == 'start':
    object_def_type = 'test'
  return object_def_type

def get_count(actor_df, object_def_type, verb):
  temp = actor_df[['object.definition.type', 'verb.id']].value_counts()
  if (object_def_type, verb) not in temp.index:
    return (object_def_type, verb, 0)

  count = temp.loc[(object_def_type, verb)]
  return (object_def_type, verb, count)

def get_score(actor_df, object_def_type, verb):
  scores = actor_df[(actor_df['object.definition.type'] == object_def_type) & (actor_df['verb.id'] == verb)]['result.score.scaled']
  if scores.empty:
    return None, None, None

  return min(scores), scores.mean(), max(scores)

def calculate_score(df):
  if df.empty:
    return 0
  total_score = max(0, df[df['result.success'] == True]['result.score.scaled'].fillna(0).mean())
  return total_score

def get_ratio(actor_df: pd.DataFrame, object_definition_type: str, verb_id1: str, verb_id2: str):
  n1 = actor_df[(actor_df['object.definition.type'] == object_definition_type) & (actor_df['verb.id'] == verb_id1)].shape[0]
  n2 = actor_df[(actor_df['object.definition.type'] == object_definition_type) & (actor_df['verb.id'] == verb_id2)].shape[0]
  return None if n2 == 0 else n1 / n2

def get_place_in_course(df, actor_id, course):
  course_df = df[df['Course'] == course]

  scores = []
  actor_score = 0
  for actor in set(course_df['actor.id']):
    actor_df = df[(df['actor.id'] == actor)]
    score = calculate_score(actor_df[actor_df['object.definition.type'].isin(['homework', 'quiz', 'test'])][['result.score.scaled', 'result.success']])
    if actor == actor_id:
      actor_score = score

    scores.append(score)

  return sorted(list(dict.fromkeys(scores)))[::-1].index(actor_score) + 1, len(scores)

def get_place_in_institution(df, actor_id, instituiton):
  instituiton_df = df[df['Institution'] == instituiton]

  scores = []
  actor_score = 0
  for actor in set(instituiton_df['actor.id']):
    actor_df = df[(df['actor.id'] == actor)]
    score = calculate_score(actor_df[actor_df['object.definition.type'].isin(['homework', 'quiz', 'test'])][['result.score.scaled', 'result.success']])
    if actor == actor_id:
      actor_score = score

    scores.append(score)

  return sorted(list(dict.fromkeys(scores)))[::-1].index(actor_score) + 1, len(scores)

def get_successful_assessments(actor_df, type_of_assessment):
  successful_assesment = actor_df[(actor_df['object.definition.type'] == type_of_assessment[0]) & (actor_df['verb.id'] == type_of_assessment[1])]['result.success'].shape[0]
  return successful_assesment

def resume_actor(df, actor_id):
  # This function makes a resume of an actor.

  df = df.copy()
  df['object.definition.type'] = df.apply(lambda row: change_assessment(row['verb.id'], row['object.definition.type']), axis=1)
  df = df[~df['object.definition.type'].isin(['page', 'review', 'meeting', 'survey',  'lesson'])]

  object_definition_types = set(df['object.definition.type'])
  object_def_types_w_verbs = [(object_def_type, verb) for object_def_type in object_definition_types for verb in set(df[df['object.definition.type'] == object_def_type]['verb.id'])]

  items = [] # (object_def_type, verb, count)

  actor_df = df[df['actor.id'] == actor_id]

  for object_def_type, verb in object_def_types_w_verbs:
    object_def_type_with_count = get_count(actor_df, object_def_type, verb)
    items.append(object_def_type_with_count)

  actions_df = pd.DataFrame({'Type': [item[0] for item in items],
                               'Verb': [item[1] for item in items],
                               'Count': [item[2] for item in items]}).sort_values(by=['Count'], ascending = False).reset_index().drop('index', axis=1)

  min_score_homework, avg_score_homework, max_score_homework = get_score(actor_df, 'homework', 'scored')
  min_score_test, avg_score_test, max_score_test = get_score(actor_df, 'test', 'completed')
  min_score_quiz, avg_score_quiz, max_score_quiz = get_score(actor_df, 'quiz', 'completed')

  successful_assessments = [get_successful_assessments(actor_df, type_of_assessment) for type_of_assessment in [('quiz', 'completed'), ('homework', 'scored'), ('test', 'completed')]]



  scores_df = pd.DataFrame({'Type': ['homework', 'test', 'quiz'],
                            'Min_Score': [min_score_homework, min_score_test, min_score_quiz],
                            'Avg_Score': [avg_score_homework, avg_score_test, avg_score_quiz],
                            'Max_Score': [max_score_homework, max_score_test, max_score_quiz]})

  course = actor_df['Course'].iloc[0]
  institution = actor_df['Institution'].iloc[0]

  place_in_course = get_place_in_course(df, actor_id, course)
  place_in_institution = get_place_in_institution(df, actor_id, institution)
  place = [place_in_course, place_in_institution]

  return actions_df, scores_df, successful_assessments, place

def display_actor(df, actor_id):
  actions_df, scores_df, successful_assessments, place = resume_actor(df, actor_id)

  # display(actions_df) -> Тук трябва да го измислим как точно и кое
  # display(scores_df) -> как точно да го представим
  print(actions_df)
  print('*'*50)
  print(scores_df)
  print('*'*50)
  print(f'Number of successful quizzes: {successful_assessments[0]}')
  print(f'Number of successful homeworks: {successful_assessments[1]}')
  print(f'Number of successful tests: {successful_assessments[2]}')
  print('*'*50)
  print(place)
  #display(place) -> Пак трябва да измислим как да го направим.



In [43]:
def resume_course_or_institution(df, id):

  institutions = set(df['Institution'])

  type_object = 'Institution' if id in institutions else 'Course'

  df = df.copy()
  df['object.definition.type'] = df.apply(lambda row: change_assessment(row['verb.id'], row['object.definition.type']), axis=1)
  df = df[~df['object.definition.type'].isin(['page', 'review', 'meeting', 'survey',  'lesson'])]

  object_definition_types = set(df['object.definition.type'])
  object_def_types_w_verbs = [(object_def_type, verb) for object_def_type in object_definition_types for verb in set(df[df['object.definition.type'] == object_def_type]['verb.id'])]

  items = [] # (object_def_type, verb, count)

  object_df = df[df[type_object] == id]
  total_students = len(set(object_df['actor.id']))
  total_students_in_courses = None if type_object == 'Course' else object_df.groupby('Course')['actor.id'].nunique().reset_index().rename(columns = {'actor.id': 'Count'})

  for object_def_type, verb in object_def_types_w_verbs:
    object_def_type_with_count = get_count(object_df, object_def_type, verb)
    items.append(object_def_type_with_count)

  actions_df = pd.DataFrame({'Type': [item[0] for item in items],
                               'Verb': [item[1] for item in items],
                               'Count': [item[2] for item in items]}).sort_values(by=['Count'], ascending = False).reset_index().drop('index', axis=1)

  min_score_homework, avg_score_homework, max_score_homework = get_score(object_df, 'homework', 'scored')
  min_score_test, avg_score_test, max_score_test = get_score(object_df, 'test', 'completed')
  min_score_quiz, avg_score_quiz, max_score_quiz = get_score(object_df, 'quiz', 'completed')

  successful_assessments = [get_successful_assessments(object_df, type_of_assessment) for type_of_assessment in [('quiz', 'completed'), ('homework', 'scored'), ('test', 'completed')]]

  scores_df = pd.DataFrame({'Type': ['homework', 'test', 'quiz'],
                            'Min_Score': [min_score_homework, min_score_test, min_score_quiz],
                            'Avg_Score': [avg_score_homework, avg_score_test, avg_score_quiz],
                            'Max_Score': [max_score_homework, max_score_test, max_score_quiz]})

  return actions_df, scores_df, successful_assessments, total_students, total_students_in_courses

def display_course_or_institution(df, name):
  actions_df, scores_df, successful_assessments, total_students, total_students_in_courses = resume_course_or_institution(df, name)
  # display(actions_df) -> Тук трябва да го измислим как точно и кое
  # display(scores_df) -> как точно да го представим
  print(f'Number of students in {name}: {total_students}')
  print('*'*50)

  if total_students_in_courses is not None:
    print(f"Number of students in {name}'s courses:\n{total_students_in_courses}")
    print('*'*50)

  print(actions_df)
  print('*'*50)

  print(f'Number of successful quizzes: {successful_assessments[0]}')
  print(f'Number of successful homeworks: {successful_assessments[1]}')
  print(f'Number of successful tests: {successful_assessments[2]}')
  print('*'*50)

  print(scores_df)
  print('*'*50)


In [9]:
resume_actor(df, None)

IndexError: ignored

In [18]:
a = list(set(df['actor.id']))

In [44]:
display_actor(df, 4)

               Type       Verb  Count
0            course     viewed    116
1              link     viewed     54
2   cmi.interaction   answered     10
3        assessment     viewed     10
4          homework     scored      8
5          homework     submit      7
6            module  completed      6
7          resource     viewed      5
8           attempt     viewed      4
9              test      start      3
10             test  completed      2
11           module     viewed      0
12             quiz  completed      0
13             quiz     viewed      0
14             quiz    receive      0
15             quiz      start      0
16       discussion     viewed      0
17       discussion     create      0
18      forum-topic     viewed      0
**************************************************
       Type  Min_Score  Avg_Score  Max_Score
0  homework        0.3     0.8025        1.0
1      test        1.0     1.0000        1.0
2      quiz        NaN        NaN        NaN
*********

In [45]:
display_course_or_institution(df, 'UEF')

Number of students in UEF: 341
**************************************************
Number of students in UEF's courses:
                                    Course  Count
0         Advanced Data Management Systems     82
1  Human Factors of Interactive Technology    259
**************************************************
               Type       Verb  Count
0            course     viewed  40826
1        discussion     viewed  23201
2   cmi.interaction   answered  18469
3           attempt     viewed  14749
4       forum-topic     viewed  10481
5          resource     viewed   9121
6            module     viewed   8849
7        assessment     viewed   6369
8            module  completed   4778
9              link     viewed   4190
10         homework     scored   2095
11             test      start   2078
12             test  completed   2039
13         homework     submit   1469
14       discussion     create    824
15             quiz  completed      0
16             quiz     viewed    