In [3]:
!pip install gdown
!python3 -m gdown "1ZmZ1RG796ClDXdjM_TKP6RGAd-pNKZfH&confirm=t"

Defaulting to user installation because normal site-packages is not writeable
Downloading...
From: https://drive.google.com/uc?id=1ZmZ1RG796ClDXdjM_TKP6RGAd-pNKZfH&confirm=t
To: /home/sabandyopadhyay/phys139_239/PHYS-139-Final-Project/cem_mitolab.zip
 81%|██████████████████████████████▋       | 2.38G/2.96G [00:39<00:07, 77.7MB/s]Error:

	[Errno 122] Disk quota exceeded

To report issues, please visit https://github.com/wkentaro/gdown/issues.
 81%|██████████████████████████████▋       | 2.39G/2.96G [00:40<00:09, 59.7MB/s]


In [1]:
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms.functional as TF
from torch.utils.data import random_split

def find_dataset_dirs(root):
    dataset_dirs = []
    for name in sorted(os.listdir(root)):
        path = os.path.join(root, name)
        if os.path.isdir(path):
            if (os.path.isdir(os.path.join(path, "images")) and
                os.path.isdir(os.path.join(path, "masks"))):
                dataset_dirs.append(path)
    return dataset_dirs

class MultiFolderSegmentationDataset(Dataset):
    def __init__(self, root, size=(256,256)):
        self.size = size
        self.images = []
        self.masks = []

        dataset_dirs = find_dataset_dirs(root)

        for ds in dataset_dirs:
            images_dir = os.path.join(ds, "images")
            masks_dir = os.path.join(ds, "masks")

            img_files = sorted(os.listdir(images_dir))

            for f in img_files:
                img_path = os.path.join(images_dir, f)
                mask_path = os.path.join(masks_dir, f)
                if os.path.exists(mask_path):
                    self.images.append(img_path)
                    self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = cv2.imread(self.images[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        mask = cv2.imread(self.masks[idx], 0)

        # resize
        img = cv2.resize(img, self.size, interpolation=cv2.INTER_LINEAR)
        mask = cv2.resize(mask, self.size, interpolation=cv2.INTER_NEAREST)

        # normalize + convert to tensor
        img = img / 255.0
        img = np.transpose(img, (2, 0, 1))  # HWC -> CHW

        img = torch.tensor(img, dtype=torch.float32)
        mask = torch.tensor(mask, dtype=torch.long)

        return img, mask

class DoubleConv(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, n_classes=2):
        super().__init__()
        self.down1 = DoubleConv(3, 64)
        self.pool1 = nn.MaxPool2d(2)

        self.down2 = DoubleConv(64, 128)
        self.pool2 = nn.MaxPool2d(2)

        self.down3 = DoubleConv(128, 256)
        self.pool3 = nn.MaxPool2d(2)

        self.down4 = DoubleConv(256, 512)
        self.pool4 = nn.MaxPool2d(2)

        self.bottleneck = DoubleConv(512, 1024)

        self.up4 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
        self.conv4 = DoubleConv(1024, 512)

        self.up3 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.conv3 = DoubleConv(512, 256)

        self.up2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.conv2 = DoubleConv(256, 128)

        self.up1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.conv1 = DoubleConv(128, 64)

        self.out_conv = nn.Conv2d(64, n_classes, 1)

    def forward(self, x):
        c1 = self.down1(x)
        p1 = self.pool1(c1)

        c2 = self.down2(p1)
        p2 = self.pool2(c2)

        c3 = self.down3(p2)
        p3 = self.pool3(c3)

        c4 = self.down4(p3)
        p4 = self.pool4(c4)

        bn = self.bottleneck(p4)

        up4 = self.up4(bn)
        merge4 = torch.cat([up4, c4], dim=1)
        c5 = self.conv4(merge4)

        up3 = self.up3(c5)
        merge3 = torch.cat([up3, c3], dim=1)
        c6 = self.conv3(merge3)

        up2 = self.up2(c6)
        merge2 = torch.cat([up2, c2], dim=1)
        c7 = self.conv2(merge2)

        up1 = self.up1(c7)
        merge1 = torch.cat([up1, c1], dim=1)
        c8 = self.conv1(merge1)

        return self.out_conv(c8)

def train(model, dataloader, device, epochs=10):
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    model.train()

    for epoch in range(epochs):
        total_loss = 0

        for imgs, masks in dataloader:
            imgs = imgs.to(device)
            masks = masks.to(device)

            preds = model(imgs)
            loss = criterion(preds, masks)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}")

root_dir = "C:/Users/HP ENVY/Desktop/Phys 139/cem_mitolab/cem_mitolab"   # parent folder that contains MANY dataset folders
dataset = MultiFolderSegmentationDataset(root_dir)

loader = DataLoader(dataset, batch_size=4, shuffle=True)

model = UNet(n_classes=2)   # mitochondria = foreground/background
device = "cuda" if torch.cuda.is_available() else "cpu"

train(model, loader, device, epochs=20)


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/HP ENVY/Desktop/Phys 139/cem_mitolab/cem_mitolab'