In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import pandas as pd
import random
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch

# Paths
root_dir = "/content/drive/MyDrive/traffic data"
folders = [
    "image2andmore",
    "image_random_intersection",
    "images_oneAccident_minor",
    "images_oneAccident_possible",
    "images_oneAccident_serious"
]
metadata_path = os.path.join(root_dir, "combined_metadata.csv")

# Load metadata
metadata = pd.read_csv(metadata_path)
metadata['image_filename'] = metadata['image_filename'].astype(str)

# Combine image paths
image_paths = []
labels = []

for folder in folders:
    full_path = os.path.join(root_dir, folder)
    images = [f for f in os.listdir(full_path) if f.lower().endswith(('.jpg', '.png'))]
    selected_images = random.sample(images, min(100, len(images)))  # Pick 100 or all if less

    for img_name in selected_images:
        label_row = metadata[metadata['image_filename'] == img_name]
        if not label_row.empty:
            risk_label = int(label_row['risk'].values[0])
            image_paths.append(os.path.join(full_path, img_name))
            labels.append(risk_label)

# Dataset class
class RoadRiskDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        img = self.transform(img)
        label = self.labels[idx]
        return img, label

# Dataset and DataLoader
dataset = RoadRiskDataset(image_paths, labels)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)




In [None]:
# Load pretrained ResNet-18
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 5)  # 5 classes: risk 0–4
model = model.to(device)

# Example training step (optional)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# One batch training loop (preview)
model.train()
for imgs, targets in dataloader:
    imgs, targets = imgs.to(device), targets.to(device)
    outputs = model(imgs)
    loss = criterion(outputs, targets)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("Sample training loss:", loss.item())
    break  # Remove this to train fully


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 159MB/s]


Sample training loss: 1.7444162368774414


In [None]:
import random
from torch.utils.data import Subset
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Select 50 random indices
random_indices = random.sample(range(len(dataloader.dataset)), 50)

# Create a subset dataset
subset_dataset = Subset(dataloader.dataset, random_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Metrics
acc = accuracy_score(all_targets, all_preds)
print(f"\n✅ Accuracy on 50 Random Images: {acc:.4f}")

print("\n📊 Classification Report:")
print(classification_report(all_targets, all_preds, digits=3))

print("\n🌀 Confusion Matrix:")
print(confusion_matrix(all_targets, all_preds))




✅ Accuracy on 50 Random Images: 0.2200

📊 Classification Report:
              precision    recall  f1-score   support

           0      1.000     0.100     0.182        10
           1      0.200     0.100     0.133        10
           2      0.000     0.000     0.000         8
           3      0.174     0.400     0.242        10
           4      0.238     0.417     0.303        12

    accuracy                          0.220        50
   macro avg      0.322     0.203     0.172        50
weighted avg      0.332     0.220     0.184        50


🌀 Confusion Matrix:
[[1 1 0 3 5]
 [0 1 0 3 6]
 [0 1 0 6 1]
 [0 2 0 4 4]
 [0 0 0 7 5]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import random
from torch.utils.data import Subset
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


random_indices = random.sample(range(len(dataloader.dataset)), 300)

# Create a subset dataset
subset_dataset = Subset(dataloader.dataset, random_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Metrics
acc = accuracy_score(all_targets, all_preds)
print(f"\n✅ Accuracy on 50 Random Images: {acc:.4f}")

print("\n📊 Classification Report:")
print(classification_report(all_targets, all_preds, digits=3))

print("\n🌀 Confusion Matrix:")
print(confusion_matrix(all_targets, all_preds))


✅ Accuracy on 50 Random Images: 0.2033

📊 Classification Report:
              precision    recall  f1-score   support

           0      1.000     0.076     0.141        66
           1      0.310     0.148     0.200        61
           2      0.500     0.016     0.031        63
           3      0.163     0.500     0.246        48
           4      0.188     0.355     0.246        62

    accuracy                          0.203       300
   macro avg      0.432     0.219     0.173       300
weighted avg      0.453     0.203     0.168       300


🌀 Confusion Matrix:
[[ 5  6  1 20 34]
 [ 0  9  0 32 20]
 [ 0  6  1 35 21]
 [ 0  4  0 24 20]
 [ 0  4  0 36 22]]


In [None]:
import random
from torch.utils.data import Subset
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Select 50 random indices
random_indices = random.sample(range(len(dataloader.dataset)), 50)

# Create a subset dataset
subset_dataset = Subset(dataloader.dataset, random_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Metrics
acc = accuracy_score(all_targets, all_preds)
print(f"\n✅ Accuracy on 50 Random Images: {acc:.4f}")

print("\n📊 Classification Report:")
print(classification_report(all_targets, all_preds, digits=3))

print("\n🌀 Confusion Matrix:")
print(confusion_matrix(all_targets, all_preds))

In [None]:
import os
import pandas as pd
import random
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch

# Paths
root_dir = "/content/drive/MyDrive/traffic data"
folders = [
    "image2andmore",
    "image_random_intersection",
    "images_oneAccident_minor",
    "images_oneAccident_possible",
    "images_oneAccident_serious"
]
metadata_path = os.path.join(root_dir, "combined_metadata.csv")

# Load metadata
metadata = pd.read_csv(metadata_path)
metadata['image_filename'] = metadata['image_filename'].astype(str)

# Combine image paths
image_paths = []
labels = []

for folder in folders:
    full_path = os.path.join(root_dir, folder)
    images = [f for f in os.listdir(full_path) if f.lower().endswith(('.jpg', '.png'))]
    selected_images = random.sample(images, min(1000, len(images)))  # Pick 100 or all if less

    for img_name in selected_images:
        label_row = metadata[metadata['image_filename'] == img_name]
        if not label_row.empty:
            risk_label = int(label_row['risk'].values[0])
            image_paths.append(os.path.join(full_path, img_name))
            labels.append(risk_label)

# Dataset class
class RoadRiskDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        img = self.transform(img)
        label = self.labels[idx]
        return img, label

# Dataset and DataLoader
dataset = RoadRiskDataset(image_paths, labels)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)




In [None]:
# Load pretrained ResNet-18
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 5)  # 5 classes: risk 0–4
model = model.to(device)

# Example training step (optional)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# One batch training loop (preview)
model.train()
for imgs, targets in dataloader:
    imgs, targets = imgs.to(device), targets.to(device)
    outputs = model(imgs)
    loss = criterion(outputs, targets)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("Sample training loss:", loss.item())
    break  # Remove this to train fully




Sample training loss: 1.5364779233932495


In [None]:
import random
from torch.utils.data import Subset
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Select 50 random indices
random_indices = random.sample(range(len(dataloader.dataset)), 100)

# Create a subset dataset
subset_dataset = Subset(dataloader.dataset, random_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# Evaluation
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Metrics
acc = accuracy_score(all_targets, all_preds)
print(f"\n✅ Accuracy on 50 Random Images: {acc:.4f}")

print("\n📊 Classification Report:")
print(classification_report(all_targets, all_preds, digits=3))

print("\n🌀 Confusion Matrix:")
print(confusion_matrix(all_targets, all_preds))


✅ Accuracy on 50 Random Images: 0.2200

📊 Classification Report:
              precision    recall  f1-score   support

           0      0.235     0.364     0.286        22
           1      0.500     0.100     0.167        20
           2      0.148     0.444     0.222        18
           3      0.500     0.050     0.091        20
           4      0.500     0.150     0.231        20

    accuracy                          0.220       100
   macro avg      0.377     0.222     0.199       100
weighted avg      0.378     0.220     0.201       100


🌀 Confusion Matrix:
[[ 8  2 12  0  0]
 [ 2  2 15  0  1]
 [ 8  0  8  0  2]
 [11  0  8  1  0]
 [ 5  0 11  1  3]]


In [None]:
# Calculate average absolute difference between predicted and actual risk
differences = [abs(p - t) for p, t in zip(all_preds, all_targets)]
avg_difference = sum(differences) / len(differences)

print(f"\n📏 Average Absolute Risk Difference: {avg_difference:.3f}")



📏 Average Absolute Risk Difference: 1.500


In [None]:
import random
from torch.utils.data import Subset, DataLoader
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Select 200 random indices from dataset
random_indices = random.sample(range(len(dataloader.dataset)), 200)

# Create subset and loader
subset_dataset = Subset(dataloader.dataset, random_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# Evaluate
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# 🎯 Map to 3 custom groups:
def map_to_group(x):
    if x == 0:
        return 0  # Low
    elif x in [1, 2, 3]:
        return 1  # Medium
    else:
        return 2  # High

grouped_preds = [map_to_group(p) for p in all_preds]
grouped_targets = [map_to_group(t) for t in all_targets]

# 🔍 Metrics
acc = accuracy_score(grouped_targets, grouped_preds)
print(f"\n✅ Accuracy (3-Group Risk Classification): {acc:.4f}")

print("\n📊 Classification Report (3 Groups):")
print(classification_report(grouped_targets, grouped_preds, target_names=["Low", "Medium", "High"], digits=3))

print("\n🌀 Confusion Matrix (3 Groups):")
print(confusion_matrix(grouped_targets, grouped_preds))



✅ Accuracy (3-Group Risk Classification): 0.4500

📊 Classification Report (3 Groups):
              precision    recall  f1-score   support

         Low      0.280     0.548     0.371        42
      Medium      0.580     0.570     0.575       114
        High      0.333     0.045     0.080        44

    accuracy                          0.450       200
   macro avg      0.398     0.388     0.342       200
weighted avg      0.463     0.450     0.423       200


🌀 Confusion Matrix (3 Groups):
[[23 19  0]
 [45 65  4]
 [14 28  2]]


In [None]:
from torch.utils.data import Subset
import random

# First, index dataset samples by their group
low_idxs = []
medium_idxs = []
high_idxs = []

for i in range(len(dataset)):
    _, label = dataset[i]  # get only the label
    if label == 0:
        low_idxs.append(i)
    elif label in [1, 2, 3]:
        medium_idxs.append(i)
    elif label == 4:
        high_idxs.append(i)

# Choose how many samples you want per group
n_per_group = 100  # or adjust as needed

# Sample from each group
balanced_test_indices = (
    random.sample(low_idxs, min(n_per_group, len(low_idxs))) +
    random.sample(medium_idxs, min(n_per_group, len(medium_idxs))) +
    random.sample(high_idxs, min(n_per_group, len(high_idxs)))
)

# Optional: shuffle the final index list
random.shuffle(balanced_test_indices)

# Create balanced test set and loader
balanced_test_dataset = Subset(dataset, balanced_test_indices)
balanced_test_loader = DataLoader(balanced_test_dataset, batch_size=16, shuffle=False)


KeyboardInterrupt: 

In [None]:
import numpy as np
from torch.utils.data import Subset
# Configuration
n_per_class = 40  # 40 samples per class × 5 classes = 200 total test samples
total_test_samples = n_per_class * 5

# Pre-allocate lists for each class
class_indices = {cls: [] for cls in range(5)}

# Single pass through dataset (optimized)
for idx in np.random.permutation(len(dataset))[:1000]:  # Check max 1000 random samples
    _, label = dataset[idx]
    if len(class_indices[label]) < n_per_class:
        class_indices[label].append(idx)
    # Early exit if all classes have enough samples
    if all(len(v) >= n_per_class for v in class_indices.values()):
        break

# Combine selected indices
selected_indices = []
for cls in range(5):
    selected_indices.extend(class_indices[cls][:n_per_class])

# Create subset and loader
subset_dataset = Subset(dataset, selected_indices)
subset_loader = DataLoader(subset_dataset, batch_size=16, shuffle=False)

# The rest of your evaluation code remains the same...
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in subset_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# 🎯 Map to 3 custom groups:
def map_to_group(x):
    if x == 0:
        return 0  # Low
    elif x in [1, 2, 3]:
        return 1  # Medium
    else:
        return 2  # High

grouped_preds = [map_to_group(p) for p in all_preds]
grouped_targets = [map_to_group(t) for t in all_targets]

# 🔍 Metrics
acc = accuracy_score(grouped_targets, grouped_preds)
print(f"\n✅ Accuracy (3-Group Risk Classification): {acc:.4f}")

print("\n📊 Classification Report (3 Groups):")
print(classification_report(grouped_targets, grouped_preds, target_names=["Low", "Medium", "High"], digits=3))

print("\n🌀 Confusion Matrix (3 Groups):")
print(confusion_matrix(grouped_targets, grouped_preds))


✅ Accuracy (3-Group Risk Classification): 0.4150

📊 Classification Report (3 Groups):
              precision    recall  f1-score   support

         Low      0.247     0.500     0.331        40
      Medium      0.549     0.517     0.532       120
        High      0.167     0.025     0.043        40

    accuracy                          0.415       200
   macro avg      0.321     0.347     0.302       200
weighted avg      0.412     0.415     0.394       200


🌀 Confusion Matrix (3 Groups):
[[20 20  0]
 [53 62  5]
 [ 8 31  1]]


In [None]:
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for imgs, targets in balanced_test_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())
