In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns()', None)

# Data Loading

In [5]:
df = pd.read_csv('/content/sample_data/ILEDA_anonymized.csv').drop_duplicates()
df

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,verb.display,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
0,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T18:55:47Z,http://id.tincanapi.com/verb/viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
1,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:21:18Z,http://id.tincanapi.com/verb/viewed,viewed,Assignment Instructions: ER & EER,http://adlnet.gov/expapi/activities/link,UEF Moodle > Advanced Data Management Systems ...,,False,False
2,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:08:13Z,http://id.tincanapi.com/verb/viewed,viewed,Recording: Intro lecture,http://adlnet.gov/expapi/activities/link,UEF Moodle > Advanced Data Management Systems ...,,False,False
3,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:08:10Z,http://id.tincanapi.com/verb/viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
4,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T09:03:32Z,http://id.tincanapi.com/verb/viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
306723,BMU,IT131,87441,2023-06-11T22:57:27Z,http://adlnet.gov/expapi/verbs/answered,answered,IT131-T05-Pitanje 13,http://adlnet.gov/expapi/activities/cmi.intera...,LAMS > test > Pitanje 13 > Attempt,1.000,True,True
306724,BMU,IT131,1c33c,2023-05-16T00:05:45Z,http://adlnet.gov/expapi/verbs/answered,answered,IT131-T03-Pitanje 15,http://adlnet.gov/expapi/activities/cmi.intera...,LAMS > test > Pitanje 15 > Attempt,0.000,False,True
306730,BMU,IT131,98607,2023-05-12T22:10:50Z,http://adlnet.gov/expapi/verbs/completed,completed,IT131-T03,http://adlnet.gov/expapi/activities/assessment,LAMS > test,0.375,True,True
306734,BMU,IT131,98607,2023-02-27T22:45:33Z,http://adlnet.gov/expapi/verbs/completed,completed,Lekcija 2 - Osnove umrežavanja - Tradicionalna...,http://id.tincanapi.com/activitytype/lms/module,LAMS > 217371,,False,False


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 299291 entries, 0 to 306739
Data columns (total 12 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   Institution             299291 non-null  object 
 1   Course                  299291 non-null  object 
 2   actor.id                299291 non-null  object 
 3   timestamp               299291 non-null  object 
 4   verb.id                 299291 non-null  object 
 5   verb.display            299291 non-null  object 
 6   object.definition.name  299291 non-null  object 
 7   object.definition.type  299291 non-null  object 
 8   context                 299291 non-null  object 
 9   result.score.scaled     11558 non-null   float64
 10  result.success          299291 non-null  bool   
 11  result.completion       299291 non-null  bool   
dtypes: bool(2), float64(1), object(9)
memory usage: 25.7+ MB


# Preprocessing

In [7]:
def change_format(x):
  return x.split('/')[-1]

df['verb.id'] = df['verb.id'].apply(change_format)
df

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,verb.display,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
0,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T18:55:47Z,viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
1,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:21:18Z,viewed,viewed,Assignment Instructions: ER & EER,http://adlnet.gov/expapi/activities/link,UEF Moodle > Advanced Data Management Systems ...,,False,False
2,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:08:13Z,viewed,viewed,Recording: Intro lecture,http://adlnet.gov/expapi/activities/link,UEF Moodle > Advanced Data Management Systems ...,,False,False
3,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T15:08:10Z,viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
4,UEF,Advanced Data Management Systems (THJ),aeb0c,2023-07-07T09:03:32Z,viewed,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
306723,BMU,IT131,87441,2023-06-11T22:57:27Z,answered,answered,IT131-T05-Pitanje 13,http://adlnet.gov/expapi/activities/cmi.intera...,LAMS > test > Pitanje 13 > Attempt,1.000,True,True
306724,BMU,IT131,1c33c,2023-05-16T00:05:45Z,answered,answered,IT131-T03-Pitanje 15,http://adlnet.gov/expapi/activities/cmi.intera...,LAMS > test > Pitanje 15 > Attempt,0.000,False,True
306730,BMU,IT131,98607,2023-05-12T22:10:50Z,completed,completed,IT131-T03,http://adlnet.gov/expapi/activities/assessment,LAMS > test,0.375,True,True
306734,BMU,IT131,98607,2023-02-27T22:45:33Z,completed,completed,Lekcija 2 - Osnove umrežavanja - Tradicionalna...,http://id.tincanapi.com/activitytype/lms/module,LAMS > 217371,,False,False


In [8]:
courses_dict = {
    'Ihminen ja vuorovaikutteinen teknologia': 'Human Factors of Interactive Technology',
    'Advanced Data Management Systems (THJ)': 'Advanced Data Management Systems',
    'Проектиране на човеко-машинен интерфейс, летен семестър 2022/2023': 'Human-computer interaction',
    'Електронно обучение, летен семестър 2022/2023': 'e-Learning',
    'ARQUITECTURA DE COMPUTADORES': 'Computer Architecture',
    'APLICACIONES WEB': 'Web Applications',
    'CS120': 'Computer Networks',
    'IT131': 'Computer Organization'
}

df.replace({"Course": courses_dict}, inplace=True)

In [9]:
different_verbs = df[df['verb.id'] != df['verb.display']][['verb.id', 'verb.display']]

In [10]:
different_verbs.value_counts()

verb.id  verb.display      
receive  received              6582
start    started               4400
scored   attained grade for    4236
submit   submitted             1481
create   created                931
join     joined                 136
leave    left                    34
dtype: int64

1:1 mapping in the 2 columns. Remove the 2nd one.

In [11]:
df = df.drop('verb.display', axis=1)

In [12]:
actors = df['actor.id'].value_counts()
df[df['actor.id'].isin(actors[actors == 1].index)]

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
15773,UEF,Advanced Data Management Systems,0954e,2023-03-28T07:32:41Z,viewed,Advanced Data Management Systems (THJ),http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
21296,UEF,Human Factors of Interactive Technology,1f3e4,2023-06-05T10:40:27Z,viewed,Ihminen ja vuorovaikutteinen teknologia,http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
75157,UEF,Human Factors of Interactive Technology,de417,2023-05-05T09:01:17Z,viewed,Ihminen ja vuorovaikutteinen teknologia,http://id.tincanapi.com/activitytype/lms/course,UEF Moodle,,False,False
168316,SU,Human-computer interaction,e4f51,2023-02-22T08:53:54Z,viewed,Как искате да се провеждат лекциите: присъстве...,http://id.tincanapi.com/activitytype/lms/module,ILEDA Project > Проектиране на човеко-машинен ...,,False,False
169391,SU,Human-computer interaction,fb51a,2023-01-31T11:41:27Z,viewed,"Проектиране на човеко-машинен интерфейс, летен...",http://id.tincanapi.com/activitytype/lms/course,ILEDA Project,,False,False
185028,SU,e-Learning,6ed85,2023-03-28T13:40:16Z,completed,Тест за установяване на предварителното запозн...,http://id.tincanapi.com/activitytype/lms/module,"ILEDA Project > Електронно обучение, летен сем...",,False,False
189817,SU,e-Learning,cd331,2023-03-13T18:59:23Z,completed,Тест за установяване на предварителното запозн...,http://id.tincanapi.com/activitytype/lms/module,"ILEDA Project > Електронно обучение, летен сем...",,False,False
189818,SU,e-Learning,47d0e,2023-03-13T18:59:23Z,completed,Тест за установяване на предварителното запозн...,http://id.tincanapi.com/activitytype/lms/module,"ILEDA Project > Електронно обучение, летен сем...",,False,False
189819,SU,e-Learning,7dd01,2023-03-13T18:59:21Z,completed,Тест за установяване на предварителното запозн...,http://id.tincanapi.com/activitytype/lms/module,"ILEDA Project > Електронно обучение, летен сем...",,False,False
196531,UL,Computer Architecture,297c3,2023-08-10T06:51:25Z,viewed,ARQUITECTURA DE COMPUTADORES - Graduado en Ing...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False


In [13]:
df.sort_values(by='timestamp')

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
292146,UL,Web Applications,23aa8,2022-08-09T09:34:44Z,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292145,UL,Web Applications,13981,2022-08-09T20:51:57Z,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292144,UL,Web Applications,23aa8,2022-08-16T16:43:02Z,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292143,UL,Web Applications,23aa8,2022-08-17T12:47:30Z,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
271397,UL,Computer Architecture,1757f,2022-08-17T22:44:48Z,viewed,ARQUITECTURA DE COMPUTADORES - Graduado en Ing...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
...,...,...,...,...,...,...,...,...,...,...,...
271400,UL,Web Applications,8d7d5,2023-08-11T07:51:13Z,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271399,UL,Web Applications,e62e6,2023-08-11T07:51:13Z,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271402,UL,Web Applications,54088,2023-08-11T07:51:13Z,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271401,UL,Web Applications,c015e,2023-08-11T07:51:13Z,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True


The data is from 09.08.2022 to 11.08.2023

In [14]:
def get_sec_day(x):
  return int(x)*24*60*60

def get_days_month(x):
  thirty_one = [1,3,5,7,8,10,12]
  thirty = [4,6,9,11]
  twenty_eight = [2]

  days = 0
  for i in range(1,int(x)+1):
    if i in thirty_one:
      days += 31
    if i in thirty:
      days += 30
    if i in twenty_eight:
      days += 28

  return days

def get_sec_month(x):
  x = get_days_month(x)
  return x*24*60*60

def get_sec_year(x):
  return int(x)*365*24*60*60

def get_sec_hour(x):
  return int(x)*60*60

def get_sec_minute(x):
  return int(x)*60

def timestamp_to_seconds(timestamp):
  date, hour = timestamp.split('-'), timestamp.split('-')[-1][3:-1]
  date[-1] = date[-1][:2]
  hour = hour.split(':')

  total_seconds = get_sec_year(date[0]) + get_sec_month(date[1]) + get_sec_day(date[2]) + get_sec_hour(hour[0]) + get_sec_minute(hour[1]) + int(hour[2])
  return total_seconds

df['timestamp'] = df['timestamp'].apply(timestamp_to_seconds)
df['timestamp'] = df['timestamp'] - min(df['timestamp'])

In [15]:
df.sort_values(by=['timestamp'])

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
292146,UL,Web Applications,23aa8,0,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292145,UL,Web Applications,13981,40633,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292144,UL,Web Applications,23aa8,630498,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
292143,UL,Web Applications,23aa8,702766,viewed,APLICACIONES WEB - Graduado en Ingeniería Info...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
271397,UL,Computer Architecture,1757f,738604,viewed,ARQUITECTURA DE COMPUTADORES - Graduado en Ing...,http://id.tincanapi.com/activitytype/lms/course,New Site,,False,False
...,...,...,...,...,...,...,...,...,...,...,...
271400,UL,Web Applications,8d7d5,31702589,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271399,UL,Web Applications,e62e6,31702589,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271402,UL,Web Applications,54088,31702589,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True
271401,UL,Web Applications,c015e,31702589,scored,Práctica final - Entrega,http://adlnet.gov/expapi/activities/assessment,New Site > APLICACIONES WEB - Graduado en Inge...,0.087,True,True


In [16]:
def change_object_definition(object_def):
  return object_def.split('/')[-1]

df['object.definition.type'] = df['object.definition.type'].apply(change_object_definition)

In [17]:
df['object.definition.type'].value_counts()

course             102682
cmi.interaction     33546
resource            30790
discussion          25061
module              23838
quiz                21083
assessment          20319
attempt             14749
forum-topic         11768
link                10281
page                 2561
review               2322
meeting               197
survey                 88
lesson                  6
Name: object.definition.type, dtype: int64

In [18]:
actors_dict = dict()
course_dict = dict()
institution_dict = dict()

for actor in set(df['actor.id']):
  actors_dict[actor] = df[df['actor.id'] == actor].sort_values(by='timestamp')

for course in set(df['Course']):
  course_dict[course] = df[df['Course'] == course].sort_values(by='timestamp')

for institution in set(df['Institution']):
  institution_dict[institution] = df[df['Institution'] == institution].sort_values(by='timestamp')

In [19]:
institution_dict['SU']

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
169433,SU,Human-computer interaction,b4324,14807381,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
169432,SU,Human-computer interaction,b4324,14807413,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
169431,SU,Human-computer interaction,190c5,14808024,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
169430,SU,Human-computer interaction,b5cad,14808935,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
169429,SU,Human-computer interaction,b5cad,14808939,viewed,Обявления,forum-topic,ILEDA Project > Проектиране на човеко-машинен ...,,False,False
...,...,...,...,...,...,...,...,...,...,...,...
153576,SU,Human-computer interaction,704aa,24872111,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
153575,SU,Human-computer interaction,704aa,24872226,viewed,Записване за защитите на проекти по ПЧМИ - Мол...,link,ILEDA Project > Проектиране на човеко-машинен ...,,False,False
153574,SU,Human-computer interaction,704aa,24872275,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False
153573,SU,Human-computer interaction,704aa,24872336,viewed,"Проектиране на човеко-машинен интерфейс, летен...",course,ILEDA Project,,False,False


In [20]:
df['result.score.scaled'] = df['result.score.scaled'].fillna(-1)
df

Unnamed: 0,Institution,Course,actor.id,timestamp,verb.id,object.definition.name,object.definition.type,context,result.score.scaled,result.success,result.completion
0,UEF,Advanced Data Management Systems,aeb0c,28718463,viewed,Advanced Data Management Systems (THJ),course,UEF Moodle,-1.000,False,False
1,UEF,Advanced Data Management Systems,aeb0c,28705594,viewed,Assignment Instructions: ER & EER,link,UEF Moodle > Advanced Data Management Systems ...,-1.000,False,False
2,UEF,Advanced Data Management Systems,aeb0c,28704809,viewed,Recording: Intro lecture,link,UEF Moodle > Advanced Data Management Systems ...,-1.000,False,False
3,UEF,Advanced Data Management Systems,aeb0c,28704806,viewed,Advanced Data Management Systems (THJ),course,UEF Moodle,-1.000,False,False
4,UEF,Advanced Data Management Systems,aeb0c,28682928,viewed,Advanced Data Management Systems (THJ),course,UEF Moodle,-1.000,False,False
...,...,...,...,...,...,...,...,...,...,...,...
306723,BMU,Computer Organization,87441,26400163,answered,IT131-T05-Pitanje 13,cmi.interaction,LAMS > test > Pitanje 13 > Attempt,1.000,True,True
306724,BMU,Computer Organization,1c33c,24157861,answered,IT131-T03-Pitanje 15,cmi.interaction,LAMS > test > Pitanje 15 > Attempt,0.000,False,True
306730,BMU,Computer Organization,98607,23891766,completed,IT131-T03,assessment,LAMS > test,0.375,True,True
306734,BMU,Computer Organization,98607,17241049,completed,Lekcija 2 - Osnove umrežavanja - Tradicionalna...,module,LAMS > 217371,-1.000,False,False


In [None]:
df[df['object.definition.name'].str.contains('Recording')]

In [None]:
df[df['object.definition.type'] == 'link']

In [23]:
df[df['object.definition.type'] == 'cmi.interaction'][['verb.id', 'object.definition.type']].value_counts()

verb.id   object.definition.type
answered  cmi.interaction           33546
dtype: int64

In [24]:
df['verb.id'].value_counts()

viewed       229721
answered      33546
completed     18224
receive        6582
start          4400
scored         4236
submit         1481
create          931
join            136
leave            34
Name: verb.id, dtype: int64

cmi.interaction == answered

In [25]:
len(set(df['actor.id'])) # 829 people

829

In [26]:
df[['verb.id', 'object.definition.type']].value_counts()

verb.id    object.definition.type
viewed     course                    102682
answered   cmi.interaction            33546
viewed     resource                   30790
           discussion                 24130
           attempt                    14749
           quiz                       13171
           module                     13111
           forum-topic                11768
completed  module                     10727
viewed     link                       10281
           assessment                  6369
completed  assessment                  5697
receive    quiz                        4260
scored     assessment                  4236
viewed     page                        2561
start      assessment                  2548
receive    review                      2322
start      quiz                        1852
completed  quiz                        1800
submit     assessment                  1469
create     discussion                   931
join       meeting                      13

In [27]:
df['object.definition.name'].value_counts()

Ihminen ja vuorovaikutteinen teknologia                              34133
ARQUITECTURA DE COMPUTADORES - Graduado en Ingeniería Informática    30135
Attempt                                                              14749
Електронно обучение, летен семестър 2022/2023                        10880
Проектиране на човеко-машинен интерфейс, летен семестър 2022/2023    10551
                                                                     ...  
Questionnaire turn                                                       1
OpenMP                                                                   1
Запис на лекцията от миналата година                                     1
Qt and Pointers                                                          1
Проектиране на съвместна работа                                          1
Name: object.definition.name, Length: 1549, dtype: int64

In [2]:
!pip install googletrans==4.0.0-rc1
from googletrans import Translator



In [30]:
translator = Translator()

# Text to be translated
text_to_translate = "Hola, ¿cómo estás?"

# Translate to English
translated_text = translator.translate(text_to_translate, dest='en')

# Print the translated text
print("Original text:", text_to_translate)
print("Translated text:", translated_text.text)

Original text: Hola, ¿cómo estás?
Translated text: Hello how are you?


In [32]:
translated_object_def = [translator.translate(text).text for text in list(set(df['object.definition.name']))[0:10]]
translated_object_def

['CS120-T04-Walking 3',
 'What of the following are the following criteria for learning?',
 'Presentation: Maxim Piles',
 'Practical Delivery 7 - Animal Crossing Cards Axios',
 'Exercise 11- Accessibility, Adaptation and Personalization',
 'IT131-T01-Walk 46',
 'Cyrillic',
 'Exercise 4 - Ideas and Prototypes',
 'Upload images demonstrating that you have been able to execute the program on your computer with the new file ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Upload images showing that you have aable to run the program on your computer with the new fille.',
 'Prior evaluation']

Not gonna work ;(. Google иска пари за това

In [35]:
df[['context', 'object.definition.type']].value_counts()

context                                                                                                    object.definition.type
UEF Moodle                                                                                                 course                    40826
New Site                                                                                                   course                    40425
ILEDA Project                                                                                              course                    21431
UEF Moodle > Ihminen ja vuorovaikutteinen teknologia > RyhmÃ¤tyÃ¶alue                                      discussion                20830
UEF Moodle > Ihminen ja vuorovaikutteinen teknologia                                                       module                    13013
                                                                                                                                     ...  
New Site > ARQUITECTURA DE COMPUTADO

In [37]:
df[df['context'].str.contains('Attempt')]['context'].value_counts()

UEF Moodle > Ihminen ja vuorovaikutteinen teknologia > TehtÃ¤vÃ¤ 9 > Attempt                                                                                    6226
UEF Moodle > Ihminen ja vuorovaikutteinen teknologia > TehtÃ¤vÃ¤ 14 > Attempt                                                                                   3520
UEF Moodle > Ihminen ja vuorovaikutteinen teknologia > TehtÃ¤vÃ¤ 4 > Attempt                                                                                    2236
UEF Moodle > Ihminen ja vuorovaikutteinen teknologia > TehtÃ¤vÃ¤ 12 > Attempt                                                                                   2166
New Site > ARQUITECTURA DE COMPUTADORES - Graduado en Ingeniería Informática > Exercise 1.1- Comparing Computer Performance (deadline: 21.04.2023) > Attempt    1890
                                                                                                                                                                ... 
LAMS > tes

In [39]:
def change_attempt_context(context):
  return ' > '.join(context.split(' > ')[-2:])
#df[df['context'].str.contains('Attempt')]['context'] = df[df['context'].str.contains('Attempt')]['context'].apply(change_attempt_context)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[df['context'].str.contains('Attempt')]['context'] = df[df['context'].str.contains('Attempt')]['context'].apply(change_attempt_context)


In [41]:
df['context'].loc[df['context'].str.contains('Attempt')] = df[df['context'].str.contains('Attempt')]['context'].apply(change_attempt_context)
df['context']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['context'].loc[df['context'].str.contains('Attempt')] = df[df['context'].str.contains('Attempt')]['context'].apply(change_attempt_context)


0                                                UEF Moodle
1         UEF Moodle > Advanced Data Management Systems ...
2         UEF Moodle > Advanced Data Management Systems ...
3                                                UEF Moodle
4                                                UEF Moodle
                                ...                        
306723                                 Pitanje 13 > Attempt
306724                                 Pitanje 15 > Attempt
306730                                          LAMS > test
306734                                        LAMS > 217371
306739                                 Pitanje 20 > Attempt
Name: context, Length: 299291, dtype: object

In [42]:
df[df['context'].str.contains('Attempt')]['context']

763         Self-assessment: SQL > Attempt
891       Self-assessment: NoSQL > Attempt
892       Self-assessment: NoSQL > Attempt
893       Self-assessment: NoSQL > Attempt
894       Self-assessment: NoSQL > Attempt
                        ...               
306715                Pitanje 36 > Attempt
306718                Pitanje 33 > Attempt
306723                Pitanje 13 > Attempt
306724                Pitanje 15 > Attempt
306739                Pitanje 20 > Attempt
Name: context, Length: 39928, dtype: object

In [43]:
set(df['context'])

{'3.1. Multilevel caching > Attempt',
 'CS120-T01 > Attempt',
 'CS120-T02 > Attempt',
 'CS120-T03 > Attempt',
 'CS120-T04 > Attempt',
 'CS120-T05 > Attempt',
 'Defence quizz Lab Assignment #3 > Attempt',
 'Defence quizz lab Assignment #4 > Attempt',
 'Defence quizz lab Assignment #6 > Attempt',
 'Exercise 1.1- Comparing Computer Performance (deadline: 21.04.2023) > Attempt',
 'Exercise 1.2- CPU Performance Equation (deadline: 21.04.2023) > Attempt',
 'Exercise 2.1. Pipelining > Attempt',
 'ILEDA Project',
 'ILEDA Project > Електронно обучение, летен семестър 2022/2023',
 'ILEDA Project > Електронно обучение, летен семестър 2022/2023 > Задание 3.1: Критерии за оценка на курс',
 'ILEDA Project > Електронно обучение, летен семестър 2022/2023 > Комбиниран тест за основни знания върху Тема 3 и Тема 5',
 'ILEDA Project > Електронно обучение, летен семестър 2022/2023 > Обявления от преподавателите',
 'ILEDA Project > Електронно обучение, летен семестър 2022/2023 > ПОВТОРЕН Тест Електронно обу

# TODO: Try to categorize the activities' s context