In [1]:
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
from PIL import Image
from io import BytesIO
import ast
import re
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torchvision.models as models
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch.optim as optim

In [2]:
img_train = pd.read_csv('633FinalData/img_train.csv')
cgm_train = pd.read_csv('633FinalData/cgm_train.csv')
demo_viome_train = pd.read_csv('633FinalData/demo_viome_train.csv')
label_train = pd.read_csv('633FinalData/label_train.csv')
img_test = pd.read_csv('633FinalData/img_test.csv')
cgm_test = pd.read_csv('633FinalData/cgm_test.csv')
demo_viome_test= pd.read_csv('633FinalData/demo_viome_test.csv')
label_test = pd.read_csv('633FinalData/label_test_breakfast_only.csv')

In [3]:
def convert_image(image_str):
    image_data = ast.literal_eval(image_str)
    image_array = np.array(image_data, dtype=np.uint8)
    return image_array.astype(np.float32) / 255.0


In [4]:
def convert_str(image_str):
    image_data = ast.literal_eval(image_str)
    image_array = np.array(image_data, dtype=np.float64)
    return image_array

In [5]:
def get_time(step):
    time_bar = np.zeros((288, 1))
    time_bar[step - 1, 0] = 1
    return time_bar

In [6]:
scaler_global = MinMaxScaler()

In [None]:
def data_preprocess(img_train,cgm_train,label_train,demo_viome_train,train=0):
    img_train = img_train.drop('Subject ID', axis=1)
    cgm_train = cgm_train.drop('Subject ID', axis=1)
    label_train = label_train.drop('Subject ID', axis=1)
    demo_viome_train = demo_viome_train.drop('Subject ID', axis=1)
    img_train = img_train.drop('Day', axis=1)
    cgm_train = cgm_train.drop('Day', axis=1)
    label_train = label_train.drop('Day', axis=1)
    
    repeated_demo_viome_train = demo_viome_train.loc[demo_viome_train.index.repeat(9)].reset_index(drop=True)
    
    
    combined_data = pd.concat([img_train, cgm_train, label_train,repeated_demo_viome_train], axis=1)
    combined_data = combined_data.dropna(subset=['Image Before Breakfast'])
    
    idx = []
    for i in range (combined_data.shape[0]):
        for j in range (combined_data.shape[1]):
            cell = combined_data.iloc[i, j]
            if isinstance(cell, str) and len(cell) == 2:  # Checking string length
                idx.append(i)
    
    combined_data.drop(idx, inplace=True)
    if train:   
        combined_data['Breakfast Time'] = pd.to_datetime(combined_data['Breakfast Time'])
        
        combined_data['Step'] = np.round((combined_data['Breakfast Time'].dt.hour*60+combined_data['Breakfast Time'].dt.minute)/5).astype(int)
        combined_data['Breakfast minute'] = combined_data['Step'].apply(get_time)

        combined_data['Lunch Time'] = pd.to_datetime(combined_data['Lunch Time'])
        combined_data['Step'] = np.round((combined_data['Lunch Time'].dt.hour*60+combined_data['Lunch Time'].dt.minute)/5).astype(int)
        combined_data['Lunch minute'] = combined_data['Step'].apply(get_time)
    
    
    else:
        combined_data['Breakfast Time'] = pd.to_datetime(combined_data['Breakfast Time'])     
        combined_data['Step'] = np.round((combined_data['Breakfast Time'].dt.hour*60+combined_data['Breakfast Time'].dt.minute)/5).astype(int)
        combined_data['Breakfast minute'] = combined_data['Step'].apply(get_time)
    
        
    combined_data['cgm_numbers'] = combined_data['CGM Data'].apply(lambda x: [float(num) for num in re.findall(r",\s([\d\.]+)\)", x)])


    combined_data['gcm_start'] = combined_data['CGM Data'].apply(lambda x: ast.literal_eval(x)[0][0])

    combined_data['gcm_start_step']  = ((combined_data['gcm_start'].astype('datetime64[ns]').dt.hour*60  + combined_data['gcm_start'].astype('datetime64[ns]').dt.minute)/5).astype(int)
    
    combined_data['repeated_front'] = combined_data.apply(lambda row: [row['cgm_numbers'][0]] * row['gcm_start_step']+ row['cgm_numbers'][1:],axis=1)
    
    
    combined_data['gcm_number_bar'] = combined_data.apply(lambda row: row['repeated_front'][:-1] + [row['repeated_front'][-1]] * (289-len(row['repeated_front'])),axis=1)
    
    combined_data['Race'] = pd.Categorical(combined_data['Race'], categories=['Hispanic/Latino', 'White', 'Other'])
    
    # If needed, convert the categories into numerical codes
    combined_data['Race_Categorical'] = combined_data['Race'].cat.codes
    
    
    combined_data = combined_data.drop(['Step','Breakfast Time','Lunch Time','CGM Data','Race','Step','cgm_numbers','gcm_start','gcm_start_step','repeated_front'], axis=1)
    
    # Print all column names
    print(combined_data.columns.tolist())
    
    
    
    combined_data['Viome'] = combined_data['Viome'].apply(convert_str)
    if train:
        img_b = combined_data[['Image Before Breakfast']]

        img_l = combined_data[['Image Before Lunch']]
        rest = combined_data.drop(columns=['Image Before Breakfast', 'Image Before Lunch'])
    else:
        img_b = combined_data[['Image Before Breakfast']]

        rest = combined_data.drop(columns=['Image Before Breakfast', 'Image Before Lunch'])
    
    if train:
        label_b = combined_data[['Breakfast Calories']]

        label_l = combined_data[['Lunch Calories']]
        rest = rest.drop(columns=['Breakfast Calories', 'Lunch Calories', 'Breakfast Carbs',
                                  'Lunch Carbs', 'Breakfast Fat', 'Lunch Fat',
                                  'Breakfast Protein', 'Lunch Protein',])
    else:
        label_b = combined_data[['Breakfast Calories']]
        rest = rest.drop(columns=['Breakfast Calories', 'Breakfast Carbs','Breakfast Fat', 
                                  'Breakfast Protein'])
        

    
    catagorical = combined_data[['Gender','Diabetes Status','Race_Categorical']]
    rest = rest.drop(columns=['Gender','Diabetes Status','Race_Categorical'])

    if train:       
        time_set  = combined_data[['gcm_number_bar','Viome','Breakfast minute']]
        time_set_l = combined_data[['Lunch minute']]
        continues = rest.drop(columns=['gcm_number_bar','Viome','Breakfast minute','Lunch minute'])
    else:
        time_set  = combined_data[['gcm_number_bar','Viome','Breakfast minute']]
        continues = rest.drop(columns=['gcm_number_bar','Viome','Breakfast minute'])
    scaler1 = MinMaxScaler()  # Or StandardScaler for standardization
    scaler2 = MinMaxScaler()
    scaler3 = MinMaxScaler()
    scaler4 = MinMaxScaler()
    scaler5 = MinMaxScaler()





    ###
    # Convert list of sequences to a NumPy array
    gcm_number_array = np.array(time_set['gcm_number_bar'].tolist())  
    gcm_number_array = gcm_number_array.reshape(-1,288)
    # Fit and transform the array
    scaled_gcm_number_array = scaler1.fit_transform(gcm_number_array)
    scaled_gcm_number_array_expand = np.expand_dims(scaled_gcm_number_array, axis=-1)
    gcm_number_tensors = torch.tensor(scaled_gcm_number_array_expand, dtype=torch.float32)


    
    Viome_array = np.array(time_set['Viome'].tolist())
    # Fit and transform the array
    scaled_Viome_sequences = scaler2.fit_transform(Viome_array)
    scaled_Viome_sequences_expand = np.expand_dims(scaled_Viome_sequences, axis=-1)
    Viome_tensors = torch.tensor(scaled_Viome_sequences_expand, dtype=torch.float32)

    if train:        
        minute_array = np.array(time_set['Breakfast minute'].tolist())
        minute_array = minute_array.reshape(-1,288)
        # print(minute_array.shape)
        # Fit and transform the array
        scaled_minute_array = scaler3.fit_transform(minute_array)
        scaled_minute_array_expand = np.expand_dims(scaled_minute_array, axis=-1)
        minute_tensors = torch.tensor(scaled_minute_array_expand, dtype=torch.float32)




        minute_array_l = np.array(time_set_l['Lunch minute'].tolist())
        minute_array_l = minute_array_l.reshape(-1,288)
        # print(minute_array.shape)
        # Fit and transform the array
        scaled_minute_l_array = scaler3.fit_transform(minute_array_l)
        scaled_minute_l_array_expand = np.expand_dims(scaled_minute_l_array, axis=-1)
        minute_l_tensors = torch.tensor(scaled_minute_l_array_expand, dtype=torch.float32)
    else:
        minute_array = np.array(time_set['Breakfast minute'].tolist())      
        # Fit and transform the array
        minute_array = minute_array.reshape(-1,288)
        scaled_minute_array = scaler3.fit_transform(minute_array)
        scaled_minute_array_expand = np.expand_dims(scaled_minute_array, axis=-1)
        minute_tensors = torch.tensor(scaled_minute_array_expand, dtype=torch.float32)
    
    # Convert the scaled array back to a tensor


    
    # Pad the variable-length sequences

    # print(len(fixed_sequence_tensors[0]))
    # print(len(padded_variable_sequences[0]))
    transform = transforms.Compose([
    transforms.ToTensor(),  # Convert NumPy array to Tensor
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
    ])


    

    if train:  
        img_b['Image Before Breakfast'] = img_b['Image Before Breakfast'].apply(convert_image)
        # img_set['Image Before Lunch'] = img_set['Image Before Lunch'].apply(convert_image)    
        # Convert numpy arrays into tensors and stack them
        # img_b_tensors = torch.stack([torch.tensor(img) for img in img_b['Image Before Breakfast']])
        # img_tensors_lunch = torch.stack([torch.tensor(img) for img in img_set['Image Before Lunch']])
        img_b_tensors = torch.stack(img_b['Image Before Breakfast'].apply(lambda x: transform(x)).tolist())




        img_l['Image Before Lunch'] = img_l['Image Before Lunch'].apply(convert_image)
        # img_set['Image Before Lunch'] = img_set['Image Before Lunch'].apply(convert_image)    
        # Convert numpy arrays into tensors and stack them
        # img_l_tensors = torch.stack([torch.tensor(img) for img in img_l['Image Before Lunch']])
        img_l_tensors = torch.stack(img_l['Image Before Lunch'].apply(lambda x: transform(x)).tolist())

        # img_tensors_lunch = torch.stack([torch.tensor(img) for img in img_set['Image Before Lunch']])
    else:
        img_b['Image Before Breakfast'] = img_b['Image Before Breakfast'].apply(convert_image)
        # img_set['Image Before Lunch'] = img_set['Image Before Lunch'].apply(convert_image)    
        # Convert numpy arrays into tensors and stack them
        # img_b_tensors = torch.stack([torch.tensor(img) for img in img_b['Image Before Breakfast']])
        img_b_tensors = torch.stack(img_b['Image Before Breakfast'].apply(lambda x: transform(x)).tolist())

        # img_tensors_lunch = torch.stack([torch.tensor(img) for img in img_set['Image Before Lunch']])
    
    
    continuous_scaled = scaler4.fit_transform(continues)
    continuous_tensor = torch.tensor(continuous_scaled, dtype=torch.float32)

        
    if train:
        # Ensure labels are numeric and then convert to tensor
        label_l = label_l.apply(pd.to_numeric, errors='coerce')  # Convert to numeric, coercing errors
        # print(label_l.shape)
        label_l_scaled = scaler_global.fit_transform(label_l)
        label_l_tensor = torch.tensor(label_l_scaled, dtype=torch.float32)

    label_b = label_b.apply(pd.to_numeric, errors='coerce')  # Convert to numeric, coercing errors
    label_b_scaled = scaler5.fit_transform(label_b)
    label_b_tensor = torch.tensor(label_b_scaled, dtype=torch.float32)

    # Ensure categorical data is numeric and then convert to tensor
    catagorical = catagorical.apply(pd.to_numeric, errors='coerce')
    catagorical_tensor = torch.tensor(catagorical.values, dtype=torch.float32)
    
    # print(len(continuous_tensor[0]))
    # print(len(catagorical_tensor[0]))
    # print(img_tensors_breakfast[0].shape)
    # print(img_tensors_lunch[0].shape)

    if train:
        return img_b_tensors,minute_tensors,gcm_number_tensors,Viome_tensors, catagorical_tensor, continuous_tensor,label_b_tensor,label_l_tensor,img_l_tensors,minute_l_tensors
    else:
        return img_b_tensors,minute_tensors,gcm_number_tensors,Viome_tensors, catagorical_tensor, continuous_tensor,label_b_tensor

In [8]:
img_b_train,minute_b_train,gcm_number_train,Viome_train, catagorical_train, continuous_train,label_b_train,label_l_train,img_l_train,minute_l_train= data_preprocess(img_train,cgm_train,label_train,demo_viome_train,train=1)
img_b_test,minute_b_test,gcm_number_test,Viome_test, catagorical_test, continuous_test,label_b_test= data_preprocess(img_test,cgm_test,label_test,demo_viome_test,train=0)

['Image Before Breakfast', 'Image Before Lunch', 'Breakfast Calories', 'Lunch Calories', 'Breakfast Carbs', 'Lunch Carbs', 'Breakfast Fat', 'Lunch Fat', 'Breakfast Protein', 'Lunch Protein', 'Age', 'Gender', 'Weight', 'Height', 'Diabetes Status', 'A1C', 'Baseline Fasting Glucose', 'Insulin', 'Triglycerides', 'Cholesterol', 'HDL', 'Non-HDL', 'LDL', 'VLDL', 'CHO/HDL Ratio', 'HOMA-IR', 'BMI', 'Viome', 'Breakfast minute', 'Lunch minute', 'gcm_number_bar', 'Race_Categorical']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  img_b['Image Before Breakfast'] = img_b['Image Before Breakfast'].apply(convert_image)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  img_l['Image Before Lunch'] = img_l['Image Before Lunch'].apply(convert_image)


['Image Before Breakfast', 'Image Before Lunch', 'Breakfast Calories', 'Breakfast Carbs', 'Breakfast Fat', 'Breakfast Protein', 'Age', 'Gender', 'Weight', 'Height', 'Diabetes Status', 'A1C', 'Baseline Fasting Glucose', 'Insulin', 'Triglycerides', 'Cholesterol', 'HDL', 'Non-HDL', 'LDL', 'VLDL', 'CHO/HDL Ratio', 'HOMA-IR', 'BMI', 'Viome', 'Breakfast minute', 'gcm_number_bar', 'Race_Categorical']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  img_b['Image Before Breakfast'] = img_b['Image Before Breakfast'].apply(convert_image)


In [9]:
minute_b_train.shape

torch.Size([271, 288, 1])

In [10]:
class CustomTrainDataset(Dataset):
    def __init__(self,img_b_train,minute_b_train,gcm_number_train,
                 Viome_train, catagorical_train, continuous_train,
                 label_b_train,label_l_train,img_l_train,
                 minute_l_train):
        
        self.catagorical_train = np.vstack([catagorical_train,catagorical_train])
        self.gcm_number_train =  np.vstack([gcm_number_train,gcm_number_train])
        self.continuous_train = np.vstack([continuous_train,continuous_train])
        self.Viome_train = np.vstack([Viome_train,Viome_train])
        
        self.Viome_train = np.reshape(self.Viome_train,(-1,27,1))

        
        self.minute_train = np.vstack([minute_b_train,minute_l_train])
        self.img_train = np.vstack([img_b_train,img_l_train])

        self.time = np.stack([self.minute_train, self.gcm_number_train], axis=-1)
        print(self.Viome_train.shape)
        
        self.label = np.vstack([label_b_train,label_l_train])

    
    def __len__(self):
        # Assuming all tensors have the same first dimension size
        return len(self.img_train)

    def __getitem__(self, idx):
        # Fetch each tensor's slice at the given index
        return {
            'img': self.img_train[idx],
            'catagorical': self.catagorical_train[idx],
            'time': self.time[idx],
            'Viome_train': self.Viome_train[idx],
            'continuous': self.continuous_train[idx],
            'label': self.label[idx],

        }

In [11]:
gcm_number_train.shape

torch.Size([271, 288, 1])

In [33]:
class CustomTestDataset(Dataset):
    def __init__(self, img_b_train,minute_b_train,gcm_number_train,
                 Viome_train, catagorical_train, continuous_train,
                 label_b_train):
        self.img_train = img_b_train
        self.catagorical_train = catagorical_train
        self.minute_train = minute_b_train
        self.gcm_number_train = gcm_number_train
        self.Viome_train = Viome_train
        self.Viome_train = np.reshape(self.Viome_train,(-1,27,1))
        self.continuous_train = continuous_train
        self.label = label_b_train
        self.time = np.stack([self.minute_train, self.gcm_number_train], axis=-1)


    
    def __len__(self):
        # Assuming all tensors have the same first dimension size
        return len(self.img_train)

    def __getitem__(self, idx):
        # Fetch each tensor's slice at the given index
        return {
            'img': self.img_train[idx],
            'catagorical': self.catagorical_train[idx],
            'time': self.time[idx],
            'Viome_train': self.Viome_train[idx],
            'continuous': self.continuous_train[idx],
            'label': self.label[idx]
        }

In [34]:
# Assume tensors have already been defined as img_tensors, label_tensor, etc.
train_dataset = CustomTrainDataset(img_b_train,minute_b_train,gcm_number_train,Viome_train, catagorical_train, continuous_train,label_b_train,label_l_train,img_l_train,minute_l_train)

# Define DataLoader with batch size, shuffling, etc.
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0,drop_last=True)


test_dataset = CustomTestDataset(img_b_test,minute_b_test,gcm_number_test,Viome_test, catagorical_test, continuous_test,label_b_test)

# Define DataLoader with batch size, shuffling, etc.
test_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0,drop_last=True)

(542, 27, 1)


In [48]:
class MultimodalModel(nn.Module):
    def __init__(self, image_model_name="resnet18", fusion_dim=512):
        super(MultimodalModel, self).__init__()

        # Image Encoder
        self.image_model = models.resnet18(pretrained=True)
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 128)

        # Time-Series Encoder
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=128, num_layers=32, batch_first=True)#fixed for 1
        self.lstm2 = nn.LSTM(input_size=2, hidden_size=128, num_layers=32, batch_first=True)

        # Tabular Data Encoder
        self.cata_fc = nn.Sequential(
            nn.Linear(3, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 128),
        )
        self.conti_fc = nn.Sequential(
            nn.Linear(15, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 128),
        )

        # Fusion Layer
        self.fusion_fc = nn.Sequential(
            nn.Linear(128 + 256 + 256  + 128 + 128, fusion_dim),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fusion_dim, 128),
            nn.ReLU(),
        )

        # Regression Head
        self.regressor = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),  # Single output for calorie prediction
            nn.Sigmoid()
        )

    def forward(self, image, time_series_1,time_series_2, catagotical, continous):
        # Process each modality
        # image = image.permute(0, 3, 1, 2)  # Reorder from [batch, height, width, channels] to [batch, channels, height, width]
        img_feat = self.image_model(image)

        # print(time_series_1.shape)
        # print(time_series_2.shape)
        _, (time_feat_1, _) = self.lstm2(time_series_1)
        _, (time_feat_2, _) = self.lstm1(time_series_2)

        # time_feat_1 = time_feat_1[-1]  # Last hidden state
        # time_feat_2 = time_feat_2[-1]  # Last hidden state



        lstm_final_output1 = torch.cat((time_feat_1[-2], time_feat_1[-1]), dim=1)
        lstm_final_output2 = torch.cat((time_feat_2[-2], time_feat_2[-1]), dim=1)

        
        cat_feat = self.cata_fc(catagotical)
        conti_feat = self.conti_fc(continous)


        
        # print(f"img_feat: {img_feat.shape}")
        # print(f"time_feat_1: {lstm_final_output1.shape}")
        # print(f"time_feat_2: {lstm_final_output2.shape}")
        # print(f"cat_feat: {cat_feat.shape}")
        # print(f"conti_feat: {conti_feat.shape}")

        # Fuse features
        fused = torch.cat([img_feat, lstm_final_output1,lstm_final_output2, cat_feat,conti_feat], dim=1)
        fusion_out = self.fusion_fc(fused)

        # Predict
        output = self.regressor(fusion_out)
        return output





In [49]:
model = MultimodalModel(
    image_model_name="resnet18",
    fusion_dim=512
)

In [50]:
class RMSRELoss(torch.nn.Module):
    def __init__(self):
        super(RMSRELoss, self).__init__()

    def forward(self, y_pred, y_true):
        # Ensure no division by zero
        relative_error = (y_pred - y_true) / (y_true + 1e-8)  # Add epsilon for numerical stability
        return torch.sqrt(torch.mean(relative_error ** 2))


In [51]:
def test_model(model, test_loader, criterion):
    model.eval()  # Set model to evaluation mode
    total_loss = 0.0
    total_rmse = 0.0
    total_rmsre = 0.0
    num_batches = 0

    with torch.no_grad():  # Disable gradient computation for testing
        for data in test_loader:
            images = data['img']
            label_b = data['label_b']
            categoricals = data['catagorical']
            time = data['time']
            Viome = data['Viome_train']
            continuous = data['continuous']
            label = data['label']

            # Forward pass
            outputs = model(images,time, Viome, categoricals, continuous)
            loss = criterion(outputs, label)  # Compute loss

            # Compute RMSE
            rmse = torch.sqrt(torch.mean((outputs - label) ** 2))

            relative_error = (outputs - label) / (outputs + 1e-8)  # Add epsilon for numerical stability
            rmsre = torch.sqrt(torch.mean(relative_error ** 2))

            # Accumulate metrics
            total_loss += loss.item()
            total_rmse += rmse.item()
            total_rmsre += rmsre.item()
            num_batches += 1

    # Average metrics over all batches
    avg_loss = total_loss / num_batches
    avg_rmse = total_rmse / num_batches
    avg_rmsre = total_rmse / num_batches

    return avg_loss, avg_rmse, avg_rmsre 

In [52]:
class RMSRELoss(nn.Module):
    def __init__(self):
        super(RMSRELoss, self).__init__()

    def forward(self, y_pred, y_true):
        # Compute relative error
        relative_error = (y_pred - y_true) / (y_true + 1e-8)  # Avoid division by zero
        # Compute RMSRE
        rmsre = torch.sqrt(torch.mean(relative_error ** 2))
        return rmsre

In [53]:
criterion = RMSRELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)

# # Train the model
# trained_model = train_model(model, dataloaders, criterion, optimizer, num_epochs=20)

In [42]:
num_epochs = 2000
for epoch in range(num_epochs):
    model.train()
    for data in train_loader:
        images = data['img']
        categoricals = data['catagorical']
        time = data['time']
        Viome = data['Viome_train']
        continuous = data['continuous']
        label = data['label']
        # print(time.shape)
        # print(Viome.shape)
        outputs = model(images,time, Viome, categoricals, continuous)
        loss = criterion(outputs, label)

        # Forward pass

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/2000], Loss: 415.2565
Epoch [2/2000], Loss: 0.8839
Epoch [3/2000], Loss: 0.8478
Epoch [4/2000], Loss: 0.8101
Epoch [5/2000], Loss: 0.9014
Epoch [6/2000], Loss: 0.9354
Epoch [7/2000], Loss: 0.8839
Epoch [8/2000], Loss: 0.8660
Epoch [9/2000], Loss: 0.8292


In [23]:
num_epochs = 2000
for epoch in range(num_epochs):
    model.train()
    for data in train_loader:
        # 获取数据
        images = data['img']
        categoricals = data['catagorical']
        time = data['time']
        Viome = data['Viome_train']
        continuous = data['continuous']
        label = data['label']
        # print(time.shape)
        # print(Viome.shape)
        outputs = model(images,time, Viome, categoricals, continuous)
        loss = criterion(outputs, label)

        # Forward pass

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/2000], Loss: 10042134.0000
Epoch [2/2000], Loss: 2080225.0000
Epoch [3/2000], Loss: 182633.2969
Epoch [4/2000], Loss: 187469.2344
Epoch [5/2000], Loss: 7367.8613
Epoch [6/2000], Loss: 4061.2874
Epoch [7/2000], Loss: 1752.8878
Epoch [8/2000], Loss: 344.0167
Epoch [9/2000], Loss: 5825.9443
Epoch [10/2000], Loss: 775.9544


KeyboardInterrupt: 

In [None]:
def predict(model, test_loader):
    model.eval()  # Set model to evaluation mode
    

    with torch.no_grad():  # Disable gradient computation for testing
        for data in test_loader:
            images = data['img']
            categoricals = data['catagorical']
            time = data['time']
            Viome = data['Viome_train']
            continuous = data['continuous']
            label = data['label']

            # Forward pass
    outputs = model(images,time, Viome, categoricals, continuous)
    Prediction = scaler_global.inverse_transform(outputs.detach().numpy())


    return Prediction

In [61]:
def predict(model, test_loader):
    model.eval()  # Set model to evaluation mode
    
    predictions = []  # Initialize an empty list to store predictions
    
    with torch.no_grad():  # Disable gradient computation for testing
        for data in test_loader:
            images = data['img']
            categoricals = data['catagorical']
            minute = data['minute_train']
            gcm_number = data['gcm_number_train']
            Viome = data['Viome_train']
            continuous = data['continuous']
            label = data['label']

            # Forward pass
            outputs = model(images, minute, gcm_number, Viome, categoricals, continuous)

            # Reverse normalization (assuming outputs are normalized)
            outputs = scaler_global.inverse_transform(outputs.detach().cpu().numpy())

            # Append the batch of predictions to the predictions list
            predictions.append(outputs)
    
    # Concatenate all predictions along the first dimension (batch dimension)
    predictions = np.concatenate(predictions, axis=0)
    
    return predictions

In [62]:
outputs  = predict(model, test_loader)

In [64]:
outputs.size

512

In [None]:
df = pd.DataFrame(outputs, columns=['Column1'])

In [None]:
df['row_id'] = range(73)

In [99]:
df['row_id'] = range(73)
# Save to CSV
df.to_csv('my_data.csv', index=True)  # index=False means do not write row names (index)


ValueError: Length of values (73) does not match length of index (64)