In [158]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import ast
from PIL import Image
from datetime import datetime

In [159]:
cgm_test = pd.read_csv('633FinalData/cgm_test.csv')
cgm_train = pd.read_csv('633FinalData/cgm_train.csv')
img_train = pd.read_csv('633FinalData/img_train.csv')
img_test = pd.read_csv('633FinalData/img_test.csv')

In [160]:
cgm_test['cgm'] = cgm_test['CGM Data'].apply(lambda x: ast.literal_eval(x))
cgm_train['cgm'] = cgm_train['CGM Data'].apply(lambda x: ast.literal_eval(x))

In [161]:
def get_img(img):
    # Convert the string representation of the image to a list
    img = ast.literal_eval(img)
    
    # Convert the list to a NumPy array
    img = np.array(img)
    
    # Convert the NumPy array to a PIL Image
    img = Image.fromarray(np.uint8(img))
    
    # Resize the image
    img = img.resize((64,64))
    
    # Convert the resized image back to a NumPy array
    img = np.array(img)
    
    if len(img.shape) == 2:
        img = np.stack((img,)*3, axis=-1)
    return img

In [162]:
img_train['img_b'] = img_train['Image Before Breakfast'].apply(get_img)
img_train['img_l'] = img_train['Image Before Lunch'].apply(get_img)
img_test['img_b'] = img_test['Image Before Breakfast'].apply(get_img)
img_test['img_l'] = img_test['Image Before Lunch'].apply(get_img)

In [165]:
def to_step(t):
    date_obj = datetime.strptime(t, '%Y-%m-%d %H:%M:%S')
    return (date_obj.hour*60 + date_obj.minute)//5

def cgm_to_steps(cgm):
    steps = [0 for _ in range(288)]
    for t,value in cgm:
        steps[to_step(t)] = value
    return steps

def time_to_step(t1,t2):
    if t1 == '{}' or t2 == '{}':
        return [0 for _ in range(288)]
    steps = [0 for _ in range(288)]
    steps[to_step(t1)] = 1
    steps[to_step(t2)] = 1
    return steps

In [166]:
# Drop rows with NaT values in 'start_time' or 'end_time'
# cgm_test = cgm_test.dropna()
cgm_train = cgm_train.dropna()
# Drop rows containing the string '{}' in any column
# cgm_test = cgm_test[~cgm_test.apply(lambda row: row.astype(str).str.contains('{}').any(), axis=1)]
cgm_train = cgm_train[~cgm_train.apply(lambda row: row.astype(str).str.contains('{}').any(), axis=1)]

cgm_test['cgm_sequential'] = cgm_test['cgm'].apply(cgm_to_steps)
cgm_train['cgm_sequential'] = cgm_train['cgm'].apply(cgm_to_steps)
cgm_train['when_to_eat'] = cgm_train[['Breakfast Time', 'Lunch Time']].apply(lambda x: time_to_step(x['Breakfast Time'], x['Lunch Time']), axis=1)
cgm_test['when_to_eat'] = cgm_test[['Breakfast Time', 'Lunch Time']].apply(lambda x: time_to_step(x['Breakfast Time'], x['Lunch Time']), axis=1)

In [167]:
viome_test = pd.read_csv('633FinalData/demo_viome_test.csv')
viome_train = pd.read_csv('633FinalData/demo_viome_train.csv')

In [168]:
viome_test= pd.get_dummies(viome_test, columns=['Race'])
viome_train= pd.get_dummies(viome_train, columns=['Race'])

In [169]:
viome_test['viome_sequential'] = viome_test['Viome'].apply(lambda x :[float(x) for x in x.split(',')])
viome_train['viome_sequential'] = viome_train['Viome'].apply(lambda x :[float(x) for x in x.split(',')])

In [170]:
combined_train = pd.merge(cgm_train, img_train, on=['Subject ID', 'Day'])
combined_train = pd.merge(combined_train, viome_train, on=['Subject ID'])
combined_test = pd.merge(cgm_test, img_test, on=['Subject ID', 'Day'])
combined_test = pd.merge(combined_test, viome_test, on=['Subject ID'])

In [171]:
combined_train.columns

Index(['Subject ID', 'Day', 'Breakfast Time', 'Lunch Time', 'CGM Data', 'cgm',
       'cgm_sequential', 'when_to_eat', 'Image Before Breakfast',
       'Image Before Lunch', 'img_b', 'img_l', 'Age', 'Gender', 'Weight',
       'Height', 'Diabetes Status', 'A1C', 'Baseline Fasting Glucose',
       'Insulin', 'Triglycerides', 'Cholesterol', 'HDL', 'Non-HDL', 'LDL',
       'VLDL', 'CHO/HDL Ratio', 'HOMA-IR', 'BMI', 'Viome',
       'Race_African American', 'Race_Hispanic/Latino', 'Race_White',
       'viome_sequential'],
      dtype='object')

In [172]:
to_drop_train = ['Subject ID','Day','Breakfast Time','Lunch Time','CGM Data','Image Before Breakfast','Image Before Lunch','Viome']
combined_train = combined_train.drop(to_drop_train, axis=1)

In [173]:
to_drop_test = ['Subject ID','Day','Breakfast Time','Lunch Time','CGM Data','Image Before Breakfast','Image Before Lunch','Viome']

In [174]:
combined_test = combined_test.drop(to_drop_train, axis=1)

In [175]:
combined_train.columns

Index(['cgm', 'cgm_sequential', 'when_to_eat', 'img_b', 'img_l', 'Age',
       'Gender', 'Weight', 'Height', 'Diabetes Status', 'A1C',
       'Baseline Fasting Glucose', 'Insulin', 'Triglycerides', 'Cholesterol',
       'HDL', 'Non-HDL', 'LDL', 'VLDL', 'CHO/HDL Ratio', 'HOMA-IR', 'BMI',
       'Race_African American', 'Race_Hispanic/Latino', 'Race_White',
       'viome_sequential'],
      dtype='object')

In [None]:
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        # Extract sequential data
        cgm_sequential = np.array(row['cgm_sequential'])
        when_to_eat = np.array(row['when_to_eat'])

        # Stack cgm_sequential and when_to_eat to create a sequential data array with 2 features
        sequential_data = np.stack((cgm_sequential, when_to_eat), axis=-1)

        sequential_data = (sequential_data - np.mean(sequential_data)) / np.std(sequential_data)

        # Extract image data
        img_b = row['img_b']
        img_l = row['img_l']

        # Convert image data to PIL Image and then to tensor
        img_b = Image.fromarray(np.uint8(img_b))
        img_l = Image.fromarray(np.uint8(img_l))

        img_b = (img_b - np.mean(img_b)) / np.std(img_b)
        img_l = (img_l - np.mean(img_l)) / np.std(img_l)

        img_b = torch.tensor(np.array(img_b), dtype=torch.float32).permute(2, 0, 1)  # Convert to CxHxW
        img_l = torch.tensor(np.array(img_l), dtype=torch.float32).permute(2, 0, 1)  # Convert to CxHxW

        # Extract numeric data
        numeric_data = row[['Age', 'Gender', 'Weight', 'Height', 'Diabetes Status', 'A1C',
                            'Baseline Fasting Glucose', 'Insulin', 'Triglycerides', 'Cholesterol',
                            'HDL', 'Non-HDL', 'LDL', 'VLDL', 'CHO/HDL Ratio', 'HOMA-IR', 'BMI',
                            'Race_African American', 'Race_Hispanic/Latino', 'Race_White']].values.astype(np.float32)
        numeric_data = (numeric_data - np.mean(numeric_data)) / np.std(numeric_data)
        numeric_data = torch.tensor(numeric_data, dtype=torch.float32)

        viome_sequential = np.array(row['viome_sequential']).reshape(27, 1)
        viome_sequential = (viome_sequential - np.mean(viome_sequential)) / np.std(viome_sequential)

        return {
            'sequential_data': torch.tensor(sequential_data, dtype=torch.float32),
            'viome_sequential': torch.tensor(viome_sequential, dtype=torch.float32),
            'img_b': img_b,
            'img_l': img_l,
            'numeric_data': numeric_data
        }

In [214]:
dataset_train = CustomDataset(combined_train)
dataset_test = CustomDataset(combined_test)

In [215]:
combined_train['img_b'].apply(lambda x: x.shape).unique()

array([(64, 64, 3)], dtype=object)

In [216]:
for i in range(1):
    sample = dataset_train[i]
    print(f"Sample {i}:")
    print(f"  sequential_data shape: {sample['sequential_data'].shape}")
    print(f"  viome_sequential shape: {sample['viome_sequential'].shape}")
    print(f"  img_b shape: {sample['img_b'].shape}")
    print(f"  img_l shape: {sample['img_l'].shape}")
    print(f"  numeric_data shape: {sample['numeric_data'].shape}")

Sample 0:
  sequential_data shape: torch.Size([288, 2])
  viome_sequential shape: torch.Size([27, 1])
  img_b shape: torch.Size([3, 64, 64])
  img_l shape: torch.Size([3, 64, 64])
  numeric_data shape: torch.Size([20])


In [217]:
len(combined_test)

73

In [218]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.models as models

class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        
        # Sequential data sub-network
        self.seq_net = nn.LSTM(input_size=2, hidden_size=64, num_layers=2, batch_first=True)
        self.seq_fc = nn.Linear(64, 32)
        
        # Viome sequential data sub-network
        self.viome_net = nn.LSTM(input_size=1, hidden_size=64, num_layers=2, batch_first=True)
        self.viome_fc = nn.Linear(64, 32)
        
        # Image data sub-network using pre-trained ResNet
        self.img_net = models.resnet18(pretrained=True)
        self.img_net.fc = nn.Linear(self.img_net.fc.in_features, 32)  # Modify the final layer
        
        # Numeric data sub-network
        self.num_net = nn.Sequential(
            nn.Linear(20, 32),
            nn.ReLU(),
            nn.Linear(32, 16)
        )
        
        # Combined fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(32 + 32 + 32 + 32 + 16, 64),  # Adjust input size to include both sequential outputs
            nn.ReLU(),
            nn.Linear(64, 1)  # Assuming a regression task
        )
    
    def forward(self, sequential_data, img_b, img_l, numeric_data, viome_sequential):
        # Process sequential data
        seq_out, _ = self.seq_net(sequential_data)
        seq_out = self.seq_fc(seq_out[:, -1, :])  # Take the last output of the LSTM and pass through a linear layer
        
        # Process viome sequential data
        viome_out, _ = self.viome_net(viome_sequential)
        viome_out = self.viome_fc(viome_out[:, -1, :])  # Take the last output of the LSTM and pass through a linear layer
        
        # Process image data
        img_b_out = self.img_net(img_b)
        img_l_out = self.img_net(img_l)
        
        # Process numeric data
        num_out = self.num_net(numeric_data)
        
        # Concatenate all outputs
        combined = torch.cat((seq_out, viome_out, img_b_out, img_l_out, num_out), dim=1)
        
        # Pass through fully connected layers
        out = self.fc(combined)
        
        return out

In [219]:
class RMSRELoss(nn.Module):
    def __init__(self):
        super(RMSRELoss, self).__init__()

    def forward(self, y_pred, y_true):
        epsilon = 1e-6  # Small value to avoid division by zero
        relative_error = (y_pred - y_true) / (y_true + epsilon)
        return torch.sqrt(torch.mean(relative_error ** 2))

In [220]:
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False)

In [221]:
model = CombinedModel()
criterion = RMSRELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.0001)



In [222]:
num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        sequential_data = batch['sequential_data']
        img_b = batch['img_b']
        img_l = batch['img_l']
        numeric_data = batch['numeric_data']
        viome_sequential = batch['viome_sequential']
        
        # Forward pass
        outputs = model(sequential_data, img_b, img_l, numeric_data, viome_sequential)
        loss = criterion(outputs, torch.zeros_like(outputs))  # Replace with actual target values
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

print("Training complete.")

  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.fromarray(np.uint8(img_l))
  cgm_sequential = (cgm_sequential - np.mean(cgm_sequential)) / np.std(cgm_sequential)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.

Epoch [1/500], Loss: nan


  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  cgm_sequential = (cgm_sequential - np.mean(cgm_sequential)) / np.std(cgm_sequential)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.

Epoch [2/500], Loss: nan


  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  cgm_sequential = (cgm_sequential - np.mean(cgm_sequential)) / np.std(cgm_sequential)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))
  img_b = (img_b - np.mean(img_b)) / np.std(img_b)
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_b = Image.fromarray(np.uint8(img_b))
  img_l = Image.fromarray(np.uint8(img_l))
  img_l = (img_l - np.mean(img_l)) / np.std(img_l)
  img_l = Image.fromarray(np.uint8(img_l))


KeyboardInterrupt: 