In [1]:
import codigo
from codigo import utils
from codigo import model_utils

In [2]:
import pandas as pd
import numpy as np
pd.set_option('mode.chained_assignment',None)
import matplotlib.pyplot as plt
import seaborn as sns
import math
from pickle import dump

%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Importar Dataset depurado

In [3]:
df_Inicial = pd.read_csv(r'dataset/generado/datasetDepurado.csv')

# Revisamos las columnas que contiene el Dataset

In [4]:
df_Inicial.head()

Unnamed: 0,ID,Date,Glucose level
0,LIB193263,2020-06-09 19:08:00,99.0
1,LIB193263,2020-06-09 19:23:00,92.0
2,LIB193263,2020-06-09 19:38:00,86.0
3,LIB193263,2020-06-09 19:53:00,85.0
4,LIB193263,2020-06-09 20:08:00,85.0


In [5]:
df_Inicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2966685 entries, 0 to 2966684
Data columns (total 3 columns):
 #   Column         Dtype  
---  ------         -----  
 0   ID             object 
 1   Date           object 
 2   Glucose level  float64
dtypes: float64(1), object(2)
memory usage: 67.9+ MB


### Cambiamos el formato de la columna Date de object a datetime64

In [6]:
df_Inicial['Date']=pd.to_datetime(df_Inicial['Date'])

In [7]:
df_Inicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2966685 entries, 0 to 2966684
Data columns (total 3 columns):
 #   Column         Dtype         
---  ------         -----         
 0   ID             object        
 1   Date           datetime64[ns]
 2   Glucose level  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 67.9+ MB


In [8]:
df_Inicial['Date'].min()

Timestamp('2018-01-06 00:31:00')

In [9]:
df_Inicial['Date'].max()

Timestamp('2022-03-20 23:58:00')

# Generar DataSet de cada Paciente

In [10]:
train_share = 0.8
val_share = 0.9
lag=30
n_ahead=1
min=15

featuresObj=['Glucose level','hour','min','pod_id','level_id']

from pickle import load
scalerGlucosa = load(open('dataset/generado/scalerGlucosa.scaler', 'rb'))
scalerHours = load(open('dataset/generado/scalerHours.scaler', 'rb'))
scalerMin = load(open('dataset/generado/scalerMin.scaler', 'rb'))
scalerPodId = load(open('dataset/generado/scalerPodId.scaler', 'rb'))
scalerLevelId = load(open('dataset/generado/scalerLevelId.scaler', 'rb'))

## Paciente 1: LIB193399

In [11]:
ID_Paciente='LIB193399'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]

Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [12]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
2966679,LIB193399,2020-10-20 20:35:00,87.0
2966680,LIB193399,2020-10-20 20:50:00,83.0
2966681,LIB193399,2020-10-20 21:05:00,76.0
2966682,LIB193399,2020-10-20 21:20:00,67.0
2966683,LIB193399,2020-10-20 21:35:00,63.0
2966684,LIB193399,2020-10-20 21:50:00,63.0


Guardamos los niveles de glucosa 

In [13]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([87., 83., 76., 67., 63., 63.])

In [14]:
with open('dataset/generado/paciente1/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [15]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
2964772,LIB193399,2020-09-30 18:15:00,105.0
2964773,LIB193399,2020-09-30 18:30:00,101.0
2964774,LIB193399,2020-09-30 18:45:00,98.0
2964775,LIB193399,2020-09-30 19:00:00,99.0
2964776,LIB193399,2020-09-30 19:15:00,108.0
...,...,...,...
2966674,LIB193399,2020-10-20 19:19:00,127.0
2966675,LIB193399,2020-10-20 19:34:00,117.0
2966676,LIB193399,2020-10-20 19:49:00,113.0
2966677,LIB193399,2020-10-20 20:04:00,106.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [16]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1929 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1929 non-null   float64
 1   hour           1929 non-null   float64
 2   min            1929 non-null   float64
 3   pod_id         1929 non-null   float64
 4   level_id       1929 non-null   float64
dtypes: float64(5)
memory usage: 90.4 KB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [17]:
eventosEliminar = load(open('dataset/generado/paciente1/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [18]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1905 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1905 non-null   float64
 1   hour           1905 non-null   float64
 2   min            1905 non-null   float64
 3   pod_id         1905 non-null   float64
 4   level_id       1905 non-null   float64
dtypes: float64(5)
memory usage: 89.3 KB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [19]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [20]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1905 entries, 0 to 1904
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           1905 non-null   datetime64[ns]
 1   Glucose level  1905 non-null   float64       
 2   ID             1905 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 44.8+ KB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [21]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [22]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1929 entries, 2020-09-30 18:15:00 to 2020-10-20 20:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  1929 non-null   float64
 1   hour           1929 non-null   float64
 2   min            1929 non-null   float64
 3   pod_id         1929 non-null   float64
 4   level_id       1929 non-null   float64
dtypes: float64(5)
memory usage: 90.4 KB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [23]:
with open('dataset/generado/paciente1/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

1519

In [24]:
with open('dataset/generado/paciente1/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

1519

In [25]:
with open('dataset/generado/paciente1/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

190

In [26]:
with open('dataset/generado/paciente1/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

190

In [27]:
with open('dataset/generado/paciente1/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

190

In [28]:
with open('dataset/generado/paciente1/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

190

## Paciente 2: LIB193271

In [29]:
ID_Paciente='LIB193271'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]

Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [30]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
401817,LIB193271,2022-03-20 22:40:00,175.0
401818,LIB193271,2022-03-20 22:55:00,172.0
401819,LIB193271,2022-03-20 23:12:00,172.0
401820,LIB193271,2022-03-20 23:27:00,179.0
401821,LIB193271,2022-03-20 23:42:00,179.0
401822,LIB193271,2022-03-20 23:57:00,172.0


Guardamos los niveles de glucosa 

In [31]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([175., 172., 172., 179., 179., 172.])

In [32]:
with open('dataset/generado/paciente2/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [33]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
386964,LIB193271,2020-09-24 17:59:00,125.0
386965,LIB193271,2020-09-24 18:14:00,109.0
386966,LIB193271,2020-09-24 18:29:00,111.0
386967,LIB193271,2020-09-24 18:44:00,119.0
386968,LIB193271,2020-09-24 18:59:00,114.0
...,...,...,...
401812,LIB193271,2022-03-20 21:25:00,184.0
401813,LIB193271,2022-03-20 21:40:00,190.0
401814,LIB193271,2022-03-20 21:55:00,200.0
401815,LIB193271,2022-03-20 22:10:00,200.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [34]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52051 entries, 2020-09-24 17:45:00 to 2022-03-20 22:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  52051 non-null  float64
 1   hour           52051 non-null  float64
 2   min            52051 non-null  float64
 3   pod_id         52051 non-null  float64
 4   level_id       52051 non-null  float64
dtypes: float64(5)
memory usage: 2.4 MB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [35]:
eventosEliminar = load(open('dataset/generado/paciente2/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [36]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52027 entries, 2020-09-24 17:45:00 to 2022-03-20 22:15:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  52027 non-null  float64
 1   hour           52027 non-null  float64
 2   min            52027 non-null  float64
 3   pod_id         52027 non-null  float64
 4   level_id       52027 non-null  float64
dtypes: float64(5)
memory usage: 2.4 MB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [37]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [38]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52027 entries, 0 to 52026
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           52027 non-null  datetime64[ns]
 1   Glucose level  52027 non-null  float64       
 2   ID             52027 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 1.2+ MB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [39]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [40]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52051 entries, 2020-09-24 17:45:00 to 2022-03-20 22:15:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  52051 non-null  float64
 1   hour           52051 non-null  float64
 2   min            52051 non-null  float64
 3   pod_id         52051 non-null  float64
 4   level_id       52051 non-null  float64
dtypes: float64(5)
memory usage: 2.4 MB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [41]:
with open('dataset/generado/paciente2/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

41616

In [42]:
with open('dataset/generado/paciente2/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

41616

In [43]:
with open('dataset/generado/paciente2/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

5202

In [44]:
with open('dataset/generado/paciente2/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

5202

In [45]:
with open('dataset/generado/paciente2/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

5203

In [46]:
with open('dataset/generado/paciente2/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

5203

## Paciente 3: LIB193315

In [47]:
ID_Paciente='LIB193315'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]

Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [48]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
1198156,LIB193315,2022-03-18 05:56:00,189.0
1198157,LIB193315,2022-03-18 06:11:00,188.0
1198158,LIB193315,2022-03-18 06:26:00,182.0
1198159,LIB193315,2022-03-18 06:41:00,171.0
1198160,LIB193315,2022-03-18 06:56:00,167.0
1198161,LIB193315,2022-03-18 07:11:00,166.0


Guardamos los niveles de glucosa 

In [49]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([189., 188., 182., 171., 167., 166.])

In [50]:
with open('dataset/generado/paciente3/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [51]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
1166570,LIB193315,2021-03-09 12:04:00,99.0
1166571,LIB193315,2021-03-09 12:19:00,89.0
1166572,LIB193315,2021-03-09 12:34:00,80.0
1166573,LIB193315,2021-03-09 12:49:00,87.0
1166574,LIB193315,2021-03-09 13:05:00,102.0
...,...,...,...
1198151,LIB193315,2022-03-18 04:41:00,221.0
1198152,LIB193315,2022-03-18 04:56:00,216.0
1198153,LIB193315,2022-03-18 05:11:00,206.0
1198154,LIB193315,2022-03-18 05:26:00,198.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [52]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35879 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35879 non-null  float64
 1   hour           35879 non-null  float64
 2   min            35879 non-null  float64
 3   pod_id         35879 non-null  float64
 4   level_id       35879 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [53]:
eventosEliminar = load(open('dataset/generado/paciente3/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [54]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35855 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35855 non-null  float64
 1   hour           35855 non-null  float64
 2   min            35855 non-null  float64
 3   pod_id         35855 non-null  float64
 4   level_id       35855 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [55]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [56]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35855 entries, 0 to 35854
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           35855 non-null  datetime64[ns]
 1   Glucose level  35855 non-null  float64       
 2   ID             35855 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 840.5+ KB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [57]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [58]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35879 entries, 2021-03-09 12:00:00 to 2022-03-18 05:30:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  35879 non-null  float64
 1   hour           35879 non-null  float64
 2   min            35879 non-null  float64
 3   pod_id         35879 non-null  float64
 4   level_id       35879 non-null  float64
dtypes: float64(5)
memory usage: 1.6 MB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [59]:
with open('dataset/generado/paciente3/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

28679

In [60]:
with open('dataset/generado/paciente3/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

28679

In [61]:
with open('dataset/generado/paciente3/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

3585

In [62]:
with open('dataset/generado/paciente3/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

3585

In [63]:
with open('dataset/generado/paciente3/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

3585

In [64]:
with open('dataset/generado/paciente3/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

3585

## Paciente 4: LIB193304

In [65]:
ID_Paciente='LIB193304'
dfpaciente=df_Inicial[df_Inicial['ID'] == ID_Paciente]

Selecionamos los últimos 6 eventos registrados, y comprobamos que esten cada 15 min

In [66]:
dfpacienteEvaluacion =dfpaciente.tail(6)
dfpacienteEvaluacion

Unnamed: 0,ID,Date,Glucose level
727174,LIB193304,2022-03-18 22:08:00,86.0
727175,LIB193304,2022-03-18 22:23:00,100.0
727176,LIB193304,2022-03-18 22:38:00,89.0
727177,LIB193304,2022-03-18 22:53:00,92.0
727178,LIB193304,2022-03-18 23:08:00,106.0
727179,LIB193304,2022-03-18 23:23:00,124.0


Guardamos los niveles de glucosa 

In [67]:
arrayEvaluacion=dfpacienteEvaluacion['Glucose level'].to_numpy()
arrayEvaluacion

array([ 86., 100.,  89.,  92., 106., 124.])

In [68]:
with open('dataset/generado/paciente4/arrayEvaluacion.npy', 'wb') as f:
    np.save(f, arrayEvaluacion)
len(arrayEvaluacion)

6

El resto del dataset lo vamos a utilizar para el entrenamiento y validación de cada modelo

In [69]:
dfpaciente = dfpaciente.iloc[:-6]
dfpaciente

Unnamed: 0,ID,Date,Glucose level
667394,LIB193304,2020-06-10 17:59:00,192.0
667395,LIB193304,2020-06-10 18:14:00,187.0
667396,LIB193304,2020-06-10 18:29:00,178.0
667397,LIB193304,2020-06-10 18:44:00,170.0
667398,LIB193304,2020-06-10 19:00:00,153.0
...,...,...,...
727169,LIB193304,2022-03-18 20:53:00,123.0
727170,LIB193304,2022-03-18 21:08:00,136.0
727171,LIB193304,2022-03-18 21:23:00,137.0
727172,LIB193304,2022-03-18 21:38:00,95.0


Realizamos el preprocesamemiento y obtenemos el dataset completo

In [70]:
dfPacienteScaled,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfpaciente,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)
dfPacienteScaled.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62033 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62033 non-null  float64
 1   hour           62033 non-null  float64
 2   min            62033 non-null  float64
 3   pod_id         62033 non-null  float64
 4   level_id       62033 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB


Eliminamos los registros de los eventos hipoglucemicos y fijos 

In [71]:
eventosEliminar = load(open('dataset/generado/paciente4/eventos.list', 'rb'))
print(len(eventosEliminar))

24


In [72]:
for evento in eventosEliminar: 
    dfPacienteScaled=dfPacienteScaled.drop(pd.Timestamp(evento))
dfPacienteScaled.info()
print('Eliminados',len(eventosEliminar))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62009 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62009 non-null  float64
 1   hour           62009 non-null  float64
 2   min            62009 non-null  float64
 3   pod_id         62009 non-null  float64
 4   level_id       62009 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB
Eliminados 24


Generamos un dataset en formato inicial con los datos restantes

In [73]:
dfPacienteInicial=pd.DataFrame()
dfPacienteInicial['Date']=dfPacienteScaled.index
dfPacienteInicial[['Glucose level']]=scalerGlucosa.inverse_transform(dfPacienteScaled[['Glucose level']].values)
dfPacienteInicial['ID']=ID_Paciente

In [74]:
dfPacienteInicial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62009 entries, 0 to 62008
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           62009 non-null  datetime64[ns]
 1   Glucose level  62009 non-null  float64       
 2   ID             62009 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 1.4+ MB


Generamos el conjunto de datos de entrenamiento a partir del dataset depurado

In [75]:
dfPacienteFinal,array_Xtrain,array_Ytrain,array_Xval,array_Yval,array_Xtest,array_Ytest=model_utils.GenDataSet(dfPacienteInicial,featuresObj,[ID_Paciente],min,train_share,val_share,lag,n_ahead,scalerHours,scalerMin,scalerGlucosa,scalerPodId,scalerLevelId,fillNullData=True,resample=True,normalized=True)

In [76]:
dfPacienteFinal.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 62033 entries, 2020-06-10 17:45:00 to 2022-03-18 21:45:00
Freq: 15T
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Glucose level  62033 non-null  float64
 1   hour           62033 non-null  float64
 2   min            62033 non-null  float64
 3   pod_id         62033 non-null  float64
 4   level_id       62033 non-null  float64
dtypes: float64(5)
memory usage: 2.8 MB


Guardamos todos los arrays de entrenamiento, validación y pruebas generados

In [77]:
with open('dataset/generado/paciente4/array_Xtrain.npy', 'wb') as f:
    np.save(f, array_Xtrain)
len(array_Ytrain)

49602

In [78]:
with open('dataset/generado/paciente4/array_Ytrain.npy', 'wb') as f:
    np.save(f, array_Ytrain)
len(array_Ytrain)

49602

In [79]:
with open('dataset/generado/paciente4/array_Xval.npy', 'wb') as f:
    np.save(f, array_Xval)
len(array_Xval)

6200

In [80]:
with open('dataset/generado/paciente4/array_Yval.npy', 'wb') as f:
    np.save(f, array_Yval)
len(array_Yval)

6200

In [81]:
with open('dataset/generado/paciente4/array_Xtest.npy', 'wb') as f:
    np.save(f, array_Xtest)
len(array_Xtest)

6201

In [82]:
with open('dataset/generado/paciente4/array_Ytest.npy', 'wb') as f:
    np.save(f, array_Ytest)
len(array_Ytest)

6201