In [3]:
!pip install -r r.txt

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Using cached torch-2.6.0-cp39-cp39-manylinux1_x86_64.whl (766.7 MB)
Collecting torchvision
  Using cached torchvision-0.21.0-cp39-cp39-manylinux1_x86_64.whl (7.2 MB)
Collecting Pillow
  Using cached pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl (4.5 MB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
Collecting efficientnet-pytorch
  Using cached efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting validators
  Using cached validators-0.34.0-py3-none-any.whl (43 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)
Collecting nvidia-cuda-cupti-cu12==12.4.127
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)
Collecting networkx
  Using cached networkx

In [1]:
# %%
import torch
import torchvision.transforms as transforms
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from efficientnet_pytorch import EfficientNet


# Set device (MPS for Mac, CUDA for GPU, else CPU)
device = torch.device("cuda" if torch.backends.cuda.is_built() else "mps" if torch.backends.mps.is_built() else "cpu")
print(f'Using {device} for training and inference')


Using cuda for training and inference


In [2]:
# %%
efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils')
efficientnet.eval().to(device)


Using cache found in /uolstore/home/users/sc21cm/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
Using cache found in /uolstore/home/users/sc21cm/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


EfficientNet(
  (stem): Sequential(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (activation): SiLU(inplace=True)
  )
  (layers): Sequential(
    (0): Sequential(
      (block0): MBConvBlock(
        (depsep): Sequential(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (se): SequentialSqueezeAndExcitation(
          (squeeze): Linear(in_features=32, out_features=8, bias=True)
          (expand): Linear(in_features=8, out_features=32, bias=True)
          (activation): SiLU(inplace=True)
          (sigmoid): Sigmoid()
          (mul_a_quantizer): Identity()
          (mul_b_quantizer): Identity()
        )
      

In [16]:
# %%
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import random

# Base directory containing all category subdirectories
base_dir = "/vol/scratch/SoC/misc/2024/sc21cm/train_256_places365standard/data_256/"

# Define transformation pipeline (resize, normalize)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to match EfficientNet input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Collect all image paths and labels
image_paths = []
labels = []

for main_dir in os.listdir(base_dir):
    main_path = os.path.join(base_dir, main_dir)
    if os.path.isdir(main_path):
        for place in os.listdir(main_path):
            place_path = os.path.join(main_path, place)
            if os.path.isdir(place_path):
                files = [os.path.join(place_path, f) for f in os.listdir(place_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
                image_paths.extend(files)
                labels.extend([f"{main_dir}/{place}"] * len(files))

# Shuffle dataset (combined image paths and labels) before splitting
combined = list(zip(image_paths, labels))
random.shuffle(combined)
image_paths, labels = zip(*combined)

# Create a mapping of label names to numeric indices
unique_labels = sorted(set(labels))
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}

# Convert labels to indices
numeric_labels = [label_to_index[label] for label in labels]

# Split dataset (80 for training, 20 for testing)
train_paths, train_labels = image_paths[:int(0.8 * len(image_paths))], numeric_labels[:int(0.8 * len(numeric_labels))]
test_paths, test_labels = image_paths[int(0.8 * len(image_paths)):], numeric_labels[int(0.8 * len(numeric_labels)):]
# train_paths, train_labels = image_paths[:800], numeric_labels[:1000]
# test_paths, test_labels = image_paths[800:1000], numeric_labels[800:1000]
# Define PyTorch dataset
class ImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

# Create Dataset & DataLoader
train_dataset = ImageDataset(train_paths, train_labels, transform=transform)
test_dataset = ImageDataset(test_paths, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True)

print(f"Train set: {len(train_dataset)} images, Test set: {len(test_dataset)} images")


Train set: 1224712 images, Test set: 306179 images


In [18]:
import csv

filename = "classmapping.csv"

with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    for key, value in label_to_index.items():
        writer.writerow([value  , key[2:]])

print(f"CSV file '{filename}' has been created.")
print(len(label_to_index))


CSV file 'classmapping.csv' has been created.
58


In [5]:
# %%
import torch.nn as nn
import torch.optim as optim

# Modify the classifier to match the number of classes
num_classes = len(unique_labels)
efficientnet.classifier.fc = nn.Linear(efficientnet.classifier.fc.in_features, num_classes)
efficientnet.to(device)

# Define optimizer
optimizer = optim.Adam(efficientnet.parameters(), lr=0.0001)  # Small LR for fine-tuning


In [6]:
label_counts = {}
for main_dir in os.listdir(base_dir):
    main_path = os.path.join(base_dir, main_dir)
    if os.path.isdir(main_path):
        for place in os.listdir(main_path):
            place_path = os.path.join(main_path, place)
            if os.path.isdir(place_path):
                num_files = len([f for f in os.listdir(place_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
                label = f"{main_dir}/{place}"
                label_counts[label] = num_files

total_samples = sum(label_counts.values())

class_weights = {label: total_samples / count for label, count in label_counts.items()}

In [67]:
print("Sample labels from train_dataset:", train_dataset.labels[:10])
print("Expected labels in class_weights:", list(class_weights.keys())[:10])


Sample labels from train_dataset: [29, 29, 49, 47, 17, 18, 6, 16, 47, 21]
Expected labels in class_weights: ['a/airport_and_airplane', 'a/arabic_cityscape', 'a/art', 'a/amusement_park', 'a/arcade', 'a/army_base', 'b/bar_and_pub', 'b/building_inside', 'b/building', 'b/boats_and_ports']


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import Counter
from datetime import datetime
from torch.cuda.amp import GradScaler, autocast  # For mixed precision training

# Compute class weights efficiently
print('1')
label_to_index = {label: idx for idx, label in enumerate(class_weights.keys())}
index_to_weight = {idx: class_weights[label] for label, idx in label_to_index.items()}

# Convert labels to corresponding weights
weights = torch.tensor([index_to_weight[label] for label in train_dataset.labels], dtype=torch.float32)
print('2')
# Use WeightedRandomSampler to handle imbalance
sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
print('3')

# Define loss function with class weights
criterion = nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights.values())).to(device))

1
2
3


In [71]:
print(index_to_weight)

{0: 76.52159352194342, 1: 78.82657947582514, 2: 76.54455, 3: 102.0594, 4: 153.0891, 5: 306.1782, 6: 76.54455, 7: 35.019810133821345, 8: 30.61782, 9: 61.23564, 10: 153.0891, 11: 153.0891, 12: 153.0891, 13: 102.0594, 14: 61.23564, 15: 45.536482346292274, 16: 102.0594, 17: 34.0198, 18: 61.23564, 19: 353.5545034642032, 20: 153.0891, 21: 306.1782, 22: 34.0198, 23: 153.0891, 24: 102.47613628756945, 25: 154.54179285281649, 26: 34.0198, 27: 103.25718332658843, 28: 76.54455, 29: 14.579914285714286, 30: 45.61517833199249, 31: 44.804817372980565, 32: 153.0891, 33: 128.4735649546828, 34: 153.0891, 35: 306.1782, 36: 76.54455, 37: 306.1782, 38: 15.30891, 39: 61.23564, 40: 76.54455, 41: 306.1782, 42: 306.1782, 43: 153.0891, 44: 51.0297, 45: 102.0594, 46: 30.61782, 47: 23.902618389620123, 48: 153.0891, 49: 12.586665899299504, 50: 27.834381818181818, 51: 76.54455, 52: 76.54455, 53: 102.0594, 54: 76.54455, 55: 156.0700377204608, 56: 153.0891, 57: 16.114642105263158}


In [None]:
# Training setup
print('Training set up')
num_epochs = 2  # Adjust as needed
efficientnet.train()
best_loss = float('inf')
patience_counter = 0
patience = 5  # Early stopping patience

# Enable mixed precision training
print('Scaler set up')
scaler = torch.amp.GradScaler('cuda',enabled=True)

try:
    efficientnet.load_state_dict(torch.load("best_efficientnet57.pth"))
    print("Loaded the best model checkpoint.")
except FileNotFoundError:
    print("No previous checkpoint found, starting from scratch.")

# Training loop
for epoch in range(num_epochs):
    correct = 0
    total = 0
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)  # Faster zeroing

        with torch.amp.autocast('cuda'):  # Enable mixed precision
            outputs = efficientnet(images)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()  # Scale gradients
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        _, predicted_labels = torch.max(outputs, 1)
        correct += (predicted_labels == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total

    if avg_loss < best_loss:
        best_loss = avg_loss
        patience_counter = 0
        torch.save(efficientnet.state_dict(), "best_efficientnet57.pth")
        print(f"Best model saved with loss: {best_loss:.4f}")
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print("Early stopping triggered.")
        break

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    print(datetime.now())

print("Training complete!")
torch.save(efficientnet.state_dict(), "efficientnet57.pth")
print("Model saved as efficientnet57.pth")
# 14:47
# 43 mins

Training set up
Scaler set up
Loaded the best model checkpoint.


In [11]:
efficientnet.load_state_dict(torch.load("/home/csunix/sc21cm/Desktop/best_efficientnet57.pth"))


<All keys matched successfully>

In [9]:
import torch
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from torch.cuda.amp import GradScaler, autocast

# Create a reverse mapping from numeric label to class name
index_to_label = {idx: label for label, idx in label_to_index.items()}

# Hyperparameters
num_epochs = 40
learning_rate = 1e-3
weight_decay = 1e-4
batch_size = 64  # Adjust as needed
patience = 5  # Early stopping patience

# Set the model to training mode
efficientnet.train()

# Optimizer and Loss function
optimizer = AdamW(efficientnet.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # Decay learning rate every 5 epochs
criterion = torch.nn.CrossEntropyLoss()

# Mixed precision training setup
scaler = GradScaler()

# Early stopping variables
best_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    correct = 0
    total = 0
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast():  # Mixed Precision
            # Forward pass
            outputs = efficientnet(images)
            loss = criterion(outputs, labels)

        # Backpropagation with mixed precision
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Get predictions
        _, predicted_labels = torch.max(outputs, 1)
        correct += (predicted_labels == labels).sum().item()
        total += labels.size(0)

    # Calculate training accuracy
    train_accuracy = 100 * correct / total

    # Print training progress
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

    # Learning rate scheduling
    scheduler.step()

    # Early stopping based on loss
    if running_loss < best_loss:
        best_loss = running_loss
        patience_counter = 0
        # Save model checkpoint
        torch.save(efficientnet.state_dict(), "best_efficientnet.pth")
        print(f"Best model saved with loss: {best_loss:.4f}")
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print("Early stopping triggered.")
        break

print("Training complete!")

# Load the best model (optional, if you need to resume or evaluate)
efficientnet.load_state_dict(torch.load("best_efficientnet.pth"))
print("Model loaded: best_efficientnet.pth")

# Final model saving after all training epochs
torch.save(efficientnet.state_dict(), "efficientnet_finetuned.pth")
print("Final model saved as efficientnet_finetuned.pth")


  scaler = GradScaler()
  with autocast():  # Mixed Precision


KeyboardInterrupt: 

In [13]:
# Set model to evaluation mode
efficientnet.eval()
correct = 0
total = 0

# Create reverse mapping from numeric labels to class names (for display)
index_to_label = {idx: label for label, idx in label_to_index.items()}

# For calculating F1 score
all_predictions = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = efficientnet(images)
        _, predicted = torch.max(outputs, 1)  # Get class with highest probability
        
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        
        # Store predictions and labels for F1 score calculation
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        # Print predictions for each image in the batch
        # for i in range(len(labels)):
        #     predicted_label_name = index_to_label[predicted[i].item()]
        #     ground_truth_label_name = index_to_label[labels[i].item()]
            # print(f"Image {i+1}: Predicted: {predicted_label_name}, Ground Truth: {ground_truth_label_name}")

# Calculate and print accuracy
accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

# Calculate F1 score
from sklearn.metrics import f1_score

# For binary classification
if len(set(all_labels)) == 2:
    f1 = f1_score(all_labels, all_predictions)
    print(f"F1 Score: {f1:.4f}")
# For multi-class classification
else:
    # Calculate macro F1 (average of F1 for each class)
    f1_macro = f1_score(all_labels, all_predictions, average='macro')
    # Calculate weighted F1 (weighted by class frequency)
    f1_weighted = f1_score(all_labels, all_predictions, average='weighted')
    print(f"Macro F1 Score: {f1_macro:.4f}")
    print(f"Weighted F1 Score: {f1_weighted:.4f}")


Test Accuracy: 80.13%
Macro F1 Score: 0.7940
Weighted F1 Score: 0.7950


In [None]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# Load trained model
model = efficientnet  # Ensure you have your model loaded
model.load_state_dict(torch.load("efficientnet_finetuned.pth", map_location=torch.device("mps")))
model.eval()

# Reverse label mapping
index_to_label = {idx: label for label, idx in label_to_index.items()}

# Define transformation (must match training!)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to load and classify an image
def classify_image():
    # Manually input image path
    file_path = input("Enter the path to your image: ")

    try:
        # Load and preprocess image
        image = Image.open(file_path).convert("RGB")
    except FileNotFoundError:
        print("File not found. Please check the path and try again.")
        return

    # Preprocess image
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    input_tensor = input_tensor.to("mps")

    # Perform inference
    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted_label = torch.max(outputs, 1)
    
    predicted_class = index_to_label[predicted_label.item()]
    
    # Display image and prediction
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Predicted Label: {predicted_class}")
    plt.show()

# Run the function
classify_image()


ModuleNotFoundError: No module named '_tkinter'