# Análise dos intervalos de tempo dos assessments

In [0]:
import re

import pandas as pd
import numpy as np

from pprint import pprint

In [0]:
df = pd.read_csv("/content/drive/My Drive/PBICT-UEA-2019 2020/arquivos_de_mineracao/datasets_csv/informacoes_sobre_os_assessments.csv")
df.head()

## Modificando os tipos de dados das colunas

In [0]:
df.info()

In [0]:
df['start_date'] = pd.to_datetime(df['start_date'], format='%Y-%m-%d %H:%M')

In [0]:
df['end_date'] = pd.to_datetime(df['end_date'], format='%Y-%m-%d %H:%M')

## Identificando o intervalo de tempo de cada assessment

In [0]:
df['time_diff'] = (df['end_date'] - df['start_date'])

In [0]:
df['time_weeks'] = df['time_diff'].apply(lambda date_diff: date_diff.days / 7)

## Análise dos intervalos de tempo

In [0]:
df[(df['class'] == 109)].sort_values(by=['start_date'])

In [0]:
df[(df['class'] == 109) & (df['assessment_type'] == 'homework')].sort_values(by=['start_date'])

In [0]:
df[(df['class'] == 109)]['time_weeks'].describe()

In [0]:
df[(df['class'] == 109)]['time_weeks'].sum()

In [0]:
df[(df['class'] == 111)]['time_weeks'].sum()

In [0]:
df[(df['class'] == 205)]['time_weeks'].sum()

In [0]:
df[(df['class'] == 205) & (df['assessment_type'] == 'homework')].sort_values(by=['start_date'])

## Quantidade de exercícios por turma

In [0]:
df.pivot_table('assessment_type', index=["semester","class"], aggfunc='count',margins=True)

In [0]:
df2 = df[df['assessment_type'] == 'homework'].sort_values(by=['class', 'start_date'])

In [0]:
df2

## Filtrando os assessments do tipo homework e laboratório

In [0]:
def has_lab(row_full):
    find_combination = re.search(r'lab', row_full, re.IGNORECASE)
    if find_combination is not None:
        return True
    else:
        return False

In [0]:

filter_by_lab = df2.apply(lambda row : has_lab(row['title']), axis=1)

In [0]:
df3 = df2[filter_by_lab]

In [0]:
df3['class'].unique()

In [0]:
df3[df3['class'] == 102]

In [0]:
df3[df3['class'] == 103]

In [0]:
df3[df3['class'] == 105]

In [0]:
df3[df3['class'] == 106]

In [0]:
df3[df3['class'] == 107]

In [0]:
df3[df3['class'] == 108]

In [0]:
df3[df3['class'] == 109]

In [0]:
# 180, 181, 183, 185, 186
df3[df3['class'] == 180]

In [0]:
df3[df3['class'] == 181]

In [0]:
df3[df3['class'] == 183]

In [0]:
df3[df3['class'] == 185]

In [0]:
df3[df3['class'] == 186]

In [0]:
df3[df3['class'] == 220]

In [0]:
df3[df3['class'] == 221]

In [0]:
df3[df3['class'] == 222]

In [0]:
df3[df3['class'] == 223]

In [0]:
df3[df3['class'] == 224]

In [0]:
df3[df3['class'] == 206]

## Gerando um dataset sem os laboratórios 0, pois geralmente possuem mais dias que o normal

### Criando um filtro para laboratórios do tipo zero

In [0]:
df3['title'].unique()

In [0]:
df3[df3['title'] == 'Laboratório de Codificação 0 – Primeiros passos']

In [0]:
def lab_zero(row_title):
    find_zero = re.search(r'\s0\s|lab0 ', row_title, re.IGNORECASE)
    if find_zero is not None:
        return False
    else:
        return True

In [0]:
filter_lab_zero = df3.apply(lambda row: lab_zero(row['title']), axis=1)

In [0]:
df4 = df3[filter_lab_zero]

In [0]:
df4.info()

In [0]:
df4

## Filtrando os três primeiros assessments do tipo homework laboratório

In [0]:
def first_three(row_title):
    find_first_three = re.search(r'\d+', row_title)
    if find_first_three is not None:
        number_assessment = int(find_first_three.group(0))
        if number_assessment <= 3:
            return True
        else:
            return False

In [0]:
filter_first_three = df4.apply(lambda row: first_three(row['title']), axis=1)

In [0]:
df5 = df4[filter_first_three]

In [0]:
df5.head(10)

## Exportando o exercícios do tipo homework para um novo dataset

In [0]:
df2.to_csv('/content/drive/My Drive/PBICT-UEA-2019 2020/arquivos_de_mineracao/arquivos_dataset/dados_dos_exercícios_tipo_homework.csv', index=False)

In [0]:
df4.to_csv('/content/drive/My Drive/PBICT-UEA-2019 2020/arquivos_de_mineracao/arquivos_dataset/dados_dos_exercícios_tipo_homework_filtrados.csv', index=False)

In [0]:
df5.to_csv('/content/drive/My Drive/PBICT-UEA-2019 2020/arquivos_de_mineracao/arquivos_dataset/dados_dos_exercícios_tipo_homework_filtrados_3_semanas.csv', index=False)