In [1]:
import codigo
from codigo import utils
from codigo import model_utils

In [2]:
import pandas as pd
import numpy as np
pd.set_option('mode.chained_assignment',None)
import matplotlib.pyplot as plt
import seaborn as sns
import math
from pickle import dump

%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Importar Dataset depurado

In [3]:
df_Inicial = pd.read_csv(r'dataset/generado/datasetDepurado.csv')

# Revisamos las columnas que contiene el Dataset

In [4]:
df_Inicial.head()

Unnamed: 0,ID,Date,Glucose level
0,LIB193263,2020-06-09 19:08:00,99.0
1,LIB193263,2020-06-09 19:23:00,92.0
2,LIB193263,2020-06-09 19:38:00,86.0
3,LIB193263,2020-06-09 19:53:00,85.0
4,LIB193263,2020-06-09 20:08:00,85.0


In [5]:
df_Inicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2966685 entries, 0 to 2966684
Data columns (total 3 columns):
 #   Column         Dtype  
---  ------         -----  
 0   ID             object 
 1   Date           object 
 2   Glucose level  float64
dtypes: float64(1), object(2)
memory usage: 67.9+ MB


### Cambiamos el formato de la columna Date de object a datetime64

In [6]:
df_Inicial['Date']=pd.to_datetime(df_Inicial['Date'])

In [7]:
df_Inicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2966685 entries, 0 to 2966684
Data columns (total 3 columns):
 #   Column         Dtype         
---  ------         -----         
 0   ID             object        
 1   Date           datetime64[ns]
 2   Glucose level  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 67.9+ MB


In [8]:
df_Inicial['Date'].min()

Timestamp('2018-01-06 00:31:00')

In [9]:
df_Inicial['Date'].max()

Timestamp('2022-03-20 23:58:00')

# Generar DataSet de cada Paciente

In [10]:
train_share = 0.8
val_share = 0.9
lag=30
n_ahead=1
min=15

featuresObj=['Glucose level','hour','min','pod_id','level_id']

from pickle import load
scalerGlucosa = load(open('dataset/generado/scalerGlucosa.scaler', 'rb'))
scalerHours = load(open('dataset/generado/scalerHours.scaler', 'rb'))
scalerMin = load(open('dataset/generado/scalerMin.scaler', 'rb'))
scalerPodId = load(open('dataset/generado/scalerPodId.scaler', 'rb'))
scalerLevelId = load(open('dataset/generado/scalerLevelId.scaler', 'rb'))

## Paciente 1: LIB193399

In [11]:
ID_Paciente='LIB193399'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]

In [12]:
dfpaciente

Unnamed: 0,ID,Date,Glucose level
2964772,LIB193399,2020-09-30 18:15:00,105.0
2964773,LIB193399,2020-09-30 18:30:00,101.0
2964774,LIB193399,2020-09-30 18:45:00,98.0
2964775,LIB193399,2020-09-30 19:00:00,99.0
2964776,LIB193399,2020-09-30 19:15:00,108.0
...,...,...,...
2966680,LIB193399,2020-10-20 20:50:00,83.0
2966681,LIB193399,2020-10-20 21:05:00,76.0
2966682,LIB193399,2020-10-20 21:20:00,67.0
2966683,LIB193399,2020-10-20 21:35:00,63.0


Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [13]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
2966679,LIB193399,2020-10-20 20:35:00,87.0
2966680,LIB193399,2020-10-20 20:50:00,83.0
2966681,LIB193399,2020-10-20 21:05:00,76.0
2966682,LIB193399,2020-10-20 21:20:00,67.0
2966683,LIB193399,2020-10-20 21:35:00,63.0
2966684,LIB193399,2020-10-20 21:50:00,63.0


Guardamos los valores de glucosa 

In [14]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([87., 83., 76., 67., 63., 63.])

In [15]:
with open('dataset/generado/paciente1/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [16]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
2964772,LIB193399,2020-09-30 18:15:00,105.0
2964773,LIB193399,2020-09-30 18:30:00,101.0
2964774,LIB193399,2020-09-30 18:45:00,98.0
2964775,LIB193399,2020-09-30 19:00:00,99.0
2964776,LIB193399,2020-09-30 19:15:00,108.0
...,...,...,...
2966674,LIB193399,2020-10-20 19:19:00,127.0
2966675,LIB193399,2020-10-20 19:34:00,117.0
2966676,LIB193399,2020-10-20 19:49:00,113.0
2966677,LIB193399,2020-10-20 20:04:00,106.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [17]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1929 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1929 non-null   float64
 1   hour           1929 non-null   float64
 2   min            1929 non-null   float64
 3   pod_id         1929 non-null   float64
 4   level_id       1929 non-null   float64
dtypes: float64(5)
memory usage: 90.4 KB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [18]:
eventosEliminar = load(open('dataset/generado/paciente1/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [19]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1905 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1905 non-null   float64
 1   hour           1905 non-null   float64
 2   min            1905 non-null   float64
 3   pod_id         1905 non-null   float64
 4   level_id       1905 non-null   float64
dtypes: float64(5)
memory usage: 89.3 KB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [20]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [21]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1905 entries, 0 to 1904
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1905 non-null   datetime64[ns]
 1   Glucose level  1905 non-null   float64       
 2   ID             1905 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 44.8+ KB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [22]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [23]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1929 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1929 non-null   float64
 1   hour           1929 non-null   float64
 2   min            1929 non-null   float64
 3   pod_id         1929 non-null   float64
 4   level_id       1929 non-null   float64
dtypes: float64(5)
memory usage: 90.4 KB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [24]:
with open('dataset/generado/paciente1/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

1519

In [25]:
with open('dataset/generado/paciente1/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

1519

In [26]:
with open('dataset/generado/paciente1/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

190

In [27]:
with open('dataset/generado/paciente1/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

190

In [28]:
with open('dataset/generado/paciente1/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

190

In [29]:
with open('dataset/generado/paciente1/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

190

## Paciente 2: LIB193353

In [97]:
ID_Paciente='LIB193353'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]
dfpaciente.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13083 entries, 1967092 to 1980174
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ID             13083 non-null  object        
 1   Date           13083 non-null  datetime64[ns]
 2   Glucose level  13083 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 408.8+ KB


Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [98]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
1980169,LIB193353,2021-10-19 10:00:00,124.0
1980170,LIB193353,2021-10-19 10:15:00,116.0
1980171,LIB193353,2021-10-19 10:30:00,117.0
1980172,LIB193353,2021-10-19 10:45:00,114.0
1980173,LIB193353,2021-10-19 11:00:00,101.0
1980174,LIB193353,2021-10-19 11:15:00,89.0


Guardamos los niveles de glucosa 

In [99]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([124., 116., 117., 114., 101.,  89.])

In [100]:
with open('dataset/generado/paciente2/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [101]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
1967092,LIB193353,2021-04-20 18:25:00,180.0
1967093,LIB193353,2021-04-20 18:40:00,176.0
1967094,LIB193353,2021-04-20 18:55:00,178.0
1967095,LIB193353,2021-04-20 19:11:00,180.0
1967096,LIB193353,2021-04-20 19:25:00,172.0
...,...,...,...
1980164,LIB193353,2021-10-19 08:45:00,180.0
1980165,LIB193353,2021-10-19 09:00:00,171.0
1980166,LIB193353,2021-10-19 09:15:00,176.0
1980167,LIB193353,2021-10-19 09:30:00,182.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [102]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17439 entries, 2021-04-20 18:15:00 to 2021-10-19 09:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  17439 non-null  float64
 1   hour           17439 non-null  float64
 2   min            17439 non-null  float64
 3   pod_id         17439 non-null  float64
 4   level_id       17439 non-null  float64
dtypes: float64(5)
memory usage: 817.5 KB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [103]:
eventosEliminar = load(open('dataset/generado/paciente2/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [104]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17415 entries, 2021-04-20 18:15:00 to 2021-10-19 09:45:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  17415 non-null  float64
 1   hour           17415 non-null  float64
 2   min            17415 non-null  float64
 3   pod_id         17415 non-null  float64
 4   level_id       17415 non-null  float64
dtypes: float64(5)
memory usage: 816.3 KB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [105]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [106]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17415 entries, 0 to 17414
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           17415 non-null  datetime64[ns]
 1   Glucose level  17415 non-null  float64       
 2   ID             17415 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 408.3+ KB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [107]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [108]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17439 entries, 2021-04-20 18:15:00 to 2021-10-19 09:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  17439 non-null  float64
 1   hour           17439 non-null  float64
 2   min            17439 non-null  float64
 3   pod_id         17439 non-null  float64
 4   level_id       17439 non-null  float64
dtypes: float64(5)
memory usage: 817.5 KB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [109]:
with open('dataset/generado/paciente2/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

13927

In [110]:
with open('dataset/generado/paciente2/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

13927

In [111]:
with open('dataset/generado/paciente2/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

1741

In [112]:
with open('dataset/generado/paciente2/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

1741

In [113]:
with open('dataset/generado/paciente2/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

1741

In [114]:
with open('dataset/generado/paciente2/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

1741

## Paciente 3: LIB193315

In [116]:
ID_Paciente='LIB193315'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]
dfpaciente.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 31592 entries, 1166570 to 1198161
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ID             31592 non-null  object        
 1   Date           31592 non-null  datetime64[ns]
 2   Glucose level  31592 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 987.2+ KB


Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [117]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
1198156,LIB193315,2022-03-18 05:56:00,189.0
1198157,LIB193315,2022-03-18 06:11:00,188.0
1198158,LIB193315,2022-03-18 06:26:00,182.0
1198159,LIB193315,2022-03-18 06:41:00,171.0
1198160,LIB193315,2022-03-18 06:56:00,167.0
1198161,LIB193315,2022-03-18 07:11:00,166.0


Guardamos los niveles de glucosa 

In [118]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([189., 188., 182., 171., 167., 166.])

In [119]:
with open('dataset/generado/paciente3/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [120]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
1166570,LIB193315,2021-03-09 12:04:00,99.0
1166571,LIB193315,2021-03-09 12:19:00,89.0
1166572,LIB193315,2021-03-09 12:34:00,80.0
1166573,LIB193315,2021-03-09 12:49:00,87.0
1166574,LIB193315,2021-03-09 13:05:00,102.0
...,...,...,...
1198151,LIB193315,2022-03-18 04:41:00,221.0
1198152,LIB193315,2022-03-18 04:56:00,216.0
1198153,LIB193315,2022-03-18 05:11:00,206.0
1198154,LIB193315,2022-03-18 05:26:00,198.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [53]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35879 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35879 non-null  float64
 1   hour           35879 non-null  float64
 2   min            35879 non-null  float64
 3   pod_id         35879 non-null  float64
 4   level_id       35879 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [54]:
eventosEliminar = load(open('dataset/generado/paciente3/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [55]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35855 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35855 non-null  float64
 1   hour           35855 non-null  float64
 2   min            35855 non-null  float64
 3   pod_id         35855 non-null  float64
 4   level_id       35855 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [56]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [57]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35855 entries, 0 to 35854
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           35855 non-null  datetime64[ns]
 1   Glucose level  35855 non-null  float64       
 2   ID             35855 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 840.5+ KB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [58]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [59]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35879 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35879 non-null  float64
 1   hour           35879 non-null  float64
 2   min            35879 non-null  float64
 3   pod_id         35879 non-null  float64
 4   level_id       35879 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [60]:
with open('dataset/generado/paciente3/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

28679

In [61]:
with open('dataset/generado/paciente3/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

28679

In [62]:
with open('dataset/generado/paciente3/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

3585

In [63]:
with open('dataset/generado/paciente3/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

3585

In [64]:
with open('dataset/generado/paciente3/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

3585

In [65]:
with open('dataset/generado/paciente3/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

3585

## Paciente 4: LIB193304

In [121]:
ID_Paciente='LIB193304'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]
dfpaciente.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59786 entries, 667394 to 727179
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ID             59786 non-null  object        
 1   Date           59786 non-null  datetime64[ns]
 2   Glucose level  59786 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 1.8+ MB


Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [122]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
727174,LIB193304,2022-03-18 22:08:00,86.0
727175,LIB193304,2022-03-18 22:23:00,100.0
727176,LIB193304,2022-03-18 22:38:00,89.0
727177,LIB193304,2022-03-18 22:53:00,92.0
727178,LIB193304,2022-03-18 23:08:00,106.0
727179,LIB193304,2022-03-18 23:23:00,124.0


Guardamos los niveles de glucosa 

In [123]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([ 86., 100.,  89.,  92., 106., 124.])

In [124]:
with open('dataset/generado/paciente4/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [125]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
667394,LIB193304,2020-06-10 17:59:00,192.0
667395,LIB193304,2020-06-10 18:14:00,187.0
667396,LIB193304,2020-06-10 18:29:00,178.0
667397,LIB193304,2020-06-10 18:44:00,170.0
667398,LIB193304,2020-06-10 19:00:00,153.0
...,...,...,...
727169,LIB193304,2022-03-18 20:53:00,123.0
727170,LIB193304,2022-03-18 21:08:00,136.0
727171,LIB193304,2022-03-18 21:23:00,137.0
727172,LIB193304,2022-03-18 21:38:00,95.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [126]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62033 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62033 non-null  float64
 1   hour           62033 non-null  float64
 2   min            62033 non-null  float64
 3   pod_id         62033 non-null  float64
 4   level_id       62033 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [127]:
eventosEliminar = load(open('dataset/generado/paciente4/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [128]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62009 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62009 non-null  float64
 1   hour           62009 non-null  float64
 2   min            62009 non-null  float64
 3   pod_id         62009 non-null  float64
 4   level_id       62009 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [129]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [130]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62009 entries, 0 to 62008
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           62009 non-null  datetime64[ns]
 1   Glucose level  62009 non-null  float64       
 2   ID             62009 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 1.4+ MB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [131]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [132]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62033 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62033 non-null  float64
 1   hour           62033 non-null  float64
 2   min            62033 non-null  float64
 3   pod_id         62033 non-null  float64
 4   level_id       62033 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [133]:
with open('dataset/generado/paciente4/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

49602

In [134]:
with open('dataset/generado/paciente4/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

49602

In [135]:
with open('dataset/generado/paciente4/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

6200

In [136]:
with open('dataset/generado/paciente4/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

6200

In [137]:
with open('dataset/generado/paciente4/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

6201

In [138]:
with open('dataset/generado/paciente4/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

6201