In [10]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.utils.data import DataLoader
from instagram_dataset_v2 import InstagramDataset
from combine_model import CombinedModel

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
from torch.utils.tensorboard import SummaryWriter
import os

writer = SummaryWriter(log_dir='./runs/instagram_model')

In [7]:
dataset = InstagramDataset(csv_file='instagram_data.csv', root_dir='.', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [8]:
from torch.utils.data import random_split

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [11]:
model = CombinedModel(num_numerical_features=3)  # 3 numerical features: no_of_comments, t, follower_count_at_t
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [12]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for images, numerical_features, likes in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(images, numerical_features)
        loss = criterion(predictions.squeeze(), likes)
        train_loss += loss.item()
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
    
    # Average training loss for the epoch
    train_loss /= len(train_loader)
    
    # Validate the model
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, numerical_features, likes in val_loader:
            predictions = model(images, numerical_features)
            loss = criterion(predictions.squeeze(), likes)
            val_loss += loss.item()
    
    # Average validation loss for the epoch
    val_loss /= len(val_loader)
    
    # Log losses to TensorBoard
    writer.add_scalars('Loss', {'Train': train_loss, 'Validation': val_loss}, epoch)

    # Print progress
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

# Close TensorBoard writer
writer.close()


Epoch [1/10], Train Loss: 448259604551744.6875, Validation Loss: 151939576746.6667
Epoch [2/10], Train Loss: 19019042586624.0000, Validation Loss: 115656219648.0000
Epoch [3/10], Train Loss: 5100802257197.8105, Validation Loss: 50026240021.3333
Epoch [4/10], Train Loss: 2583874742897.1787, Validation Loss: 49605912650.6667
Epoch [5/10], Train Loss: 1494892178507.4526, Validation Loss: 51443123498.6667
Epoch [6/10], Train Loss: 1050801079220.5474, Validation Loss: 48408200128.0000
Epoch [7/10], Train Loss: 707810094478.8210, Validation Loss: 44761401568.0000
Epoch [8/10], Train Loss: 475719865613.4737, Validation Loss: 44916511392.0000
Epoch [9/10], Train Loss: 401173977885.6421, Validation Loss: 47196576938.6667
Epoch [10/10], Train Loss: 318881232400.1684, Validation Loss: 52003659381.3333


In [13]:
model.eval()
val_predictions, val_targets = [], []

with torch.no_grad():
    for images, numerical_features, likes in val_loader:
        predictions = model(images, numerical_features)
        val_predictions.extend(predictions.squeeze().tolist())
        val_targets.extend(likes.tolist())

# Calculate Mean Squared Error (MSE)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(val_targets, val_predictions)
print(f"Final Validation MSE: {mse:.4f}")

Final Validation MSE: 50727718508.3691


In [16]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for images, numerical_features, labels in val_loader:
        # Forward pass through the model
        outputs = model(images, numerical_features).squeeze()

        # Take logarithm of predictions and labels
        log_preds = torch.log(outputs)
        log_labels = torch.log(labels)

        # Calculate bounds for predictions to be within 20% of true value in log space
        lower_bound = log_labels * 0.8
        upper_bound = log_labels * 1.2

        # Check how many predictions fall within range
        within_range = (log_preds >= lower_bound) & (log_preds <= upper_bound)
        correct += within_range.sum().item()
        total += labels.size(0)

    # Calculate accuracy
    accuracy = correct / total * 100
    print(f'Accuracy: {accuracy:.2f}%')

# Calculate Average Relative Difference
test_relative_differences = []
with torch.no_grad():
    for images, numerical_features, labels in val_loader:
        # Forward pass through the model
        outputs = model(images, numerical_features).squeeze()

        # Take logarithm of predictions and labels
        log_preds = torch.log(outputs)
        log_labels = torch.log(labels)

        # Calculate relative differences
        relative_difference = torch.abs(log_preds - log_labels) / torch.abs(log_labels)
        test_relative_differences.append(relative_difference)

# Compute the mean relative difference
average_relative_difference = torch.cat(test_relative_differences).mean().item()
print(f'Average Relative Difference on Test Set: {average_relative_difference*100:.4f}%')

Accuracy: 94.19%
Average Relative Difference on Test Set: 8.2962%


In [18]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for images, numerical_features, labels in val_loader:
        # Forward pass through the model
        outputs = model(images, numerical_features).squeeze()

        # Take logarithm of predictions and labels
        log_preds = outputs
        log_labels = labels

        # Calculate bounds for predictions to be within 20% of true value in log space
        lower_bound = log_labels * 0.5
        upper_bound = log_labels * 1.5

        # Check how many predictions fall within range
        within_range = (log_preds >= lower_bound) & (log_preds <= upper_bound)
        correct += within_range.sum().item()
        total += labels.size(0)

    # Calculate accuracy
    accuracy = correct / total * 100
    print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 37.38%


In [19]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for images, numerical_features, labels in val_loader:
        # Forward pass through the model
        outputs = model(images, numerical_features).squeeze()

        # Take logarithm of predictions and labels
        log_preds = torch.log(outputs)
        log_labels = torch.log(labels)

        # Calculate bounds for predictions to be within 20% of true value in log space
        lower_bound = log_labels * 0.9
        upper_bound = log_labels * 1.1

        # Check how many predictions fall within range
        within_range = (log_preds >= lower_bound) & (log_preds <= upper_bound)
        correct += within_range.sum().item()
        total += labels.size(0)

    # Calculate accuracy
    accuracy = correct / total * 100
    print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 68.43%
