## GANS MODEL

In [9]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torchvision.utils as vutils
from torchvision.models import inception_v3
import numpy as np
from scipy.linalg import sqrtm
import torch.nn.functional as F
from torchvision.utils import save_image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

os.makedirs("generated_images", exist_ok=True)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

class Generator(nn.Module):
    def __init__(self, nz):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(nz, 256*8*8),
            nn.BatchNorm1d(256*8*8),
            nn.ReLU(True),
            nn.Unflatten(1, (256, 8, 8)),
            nn.ConvTranspose2d(256, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 3, 3, padding=1),
            nn.Tanh()
        )
    def forward(self, x):
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 1)
        )
    def forward(self, x):
        return self.net(x)

nz = 100
G = Generator(nz).to(device)
D = Discriminator().to(device)

loss_fn = nn.BCEWithLogitsLoss()
opt_G = optim.Adam(G.parameters(), lr=2e-4, betas=(0.5, 0.999))
opt_D = optim.Adam(D.parameters(), lr=2e-4, betas=(0.5, 0.999))

inception = inception_v3(pretrained=True, transform_input=False).to(device)
inception.eval()

def get_activations(images, model, batch_size=64):
    model.eval()
    activations = []
    with torch.no_grad():
        for i in range(0, len(images), batch_size):
            batch = images[i:i+batch_size]
            batch = F.interpolate(batch, size=(299, 299), mode='bilinear', align_corners=False)
            batch = batch.to(device)
            pred = model(batch)
            activations.append(pred.detach().cpu().numpy())
    return np.concatenate(activations, axis=0)

def calculate_fid(act1, act2):
    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
    ssdiff = np.sum((mu1 - mu2)**2)
    covmean = sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    return ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)

def calculate_inception_score(preds, splits=10):
    scores = []
    N = preds.shape[0]
    for i in range(splits):
        part = preds[i * N // splits: (i+1) * N // splits]
        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
        scores.append(np.exp(np.mean(np.sum(kl, axis=1))))
    return np.mean(scores), np.std(scores)

def evaluate(generator, real_images):
    generator.eval()
    with torch.no_grad():
        z = torch.randn(5000, nz, device=device)
        fake_images = generator(z)
    fake_images = (fake_images + 1) / 2

    real = torch.stack([real_images[i][0] for i in range(5000)])
    real = (real + 1) / 2

    act_real = get_activations(real, inception)
    act_fake = get_activations(fake_images, inception)

    fid = calculate_fid(act_real, act_fake)
    is_mean, is_std = calculate_inception_score(F.softmax(torch.tensor(act_fake), dim=1).numpy())
    return fid, is_mean

def train(epochs):
    for epoch in range(epochs):
        for i, (real_imgs, _) in enumerate(dataloader):
            real_imgs = real_imgs.to(device)
            b_size = real_imgs.size(0)
            z = torch.randn(b_size, nz, device=device)
            fake_imgs = G(z)

            real_labels = torch.ones(b_size, 1, device=device)
            fake_labels = torch.zeros(b_size, 1, device=device)

            D_real = D(real_imgs)
            D_fake = D(fake_imgs.detach())
            D_loss = loss_fn(D_real, real_labels) + loss_fn(D_fake, fake_labels)

            opt_D.zero_grad()
            D_loss.backward()
            opt_D.step()

            # Train Generator
            z = torch.randn(b_size, nz, device=device)
            fake_imgs = G(z)
            D_fake = D(fake_imgs)
            G_loss = loss_fn(D_fake, real_labels)

            opt_G.zero_grad()
            G_loss.backward()
            opt_G.step()

        if (epoch + 1) % 10 == 0:
            fid, iscore = evaluate(G, dataset)
            print(f"Epoch {epoch+1} | FID: {fid:.2f} | IS: {iscore:.2f}")

            with torch.no_grad():
                z = torch.randn(64, nz, device=device)
                sample_imgs = G(z)
                sample_imgs = (sample_imgs + 1) / 2
                save_image(sample_imgs, f"generated_images/epoch_{epoch+1}.png", nrow=8)

train(epochs=100)

Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100%|██████████████████████████████████████████████████████████████| 169001437/169001437 [00:12<00:00, 13827339.20it/s]


Extracting ./data\cifar-100-python.tar.gz to ./data


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to C:\Users\vikra/.cache\torch\hub\checkpoints\inception_v3_google-0cc3c7bd.pth
100%|███████████████████████████████████████████████████████████████████████████████| 104M/104M [00:05<00:00, 19.7MB/s]


Epoch 10 | FID: 453.40 | IS: 2.23
Epoch 20 | FID: 270.04 | IS: 2.83
Epoch 30 | FID: 243.72 | IS: 2.87
Epoch 40 | FID: 183.58 | IS: 2.97
Epoch 50 | FID: 179.24 | IS: 3.13
Epoch 60 | FID: 169.50 | IS: 3.11
Epoch 70 | FID: 163.14 | IS: 3.11
Epoch 80 | FID: 154.91 | IS: 3.31
Epoch 90 | FID: 149.52 | IS: 3.10
Epoch 100 | FID: 151.69 | IS: 3.29


## DIFFUSION MODEL

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.models.inception import inception_v3
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import os
from scipy import linalg
from PIL import Image
from torchvision.datasets.folder import default_loader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleUNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(32, 3, 3, 1, 1)
        )

    def forward(self, x, t):
        return self.net(x)

class Diffusion:
    def __init__(self, timesteps=500):  # Reduced timesteps
        self.timesteps = timesteps
        self.betas = torch.linspace(1e-4, 0.02, timesteps).to(device)
        self.alphas = 1. - self.betas
        self.alpha_hats = torch.cumprod(self.alphas, dim=0).to(device)

    def noise_images(self, x, t):
        sqrt_alpha_hat = torch.sqrt(self.alpha_hats[t])[:, None, None, None]
        sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hats[t])[:, None, None, None]
        noise = torch.randn_like(x)
        return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * noise, noise

    def sample(self, model, image_size=32, n=64):
        model.eval()
        x = torch.randn((n, 3, image_size, image_size)).to(device)
        for t in reversed(range(self.timesteps)):
            t_batch = torch.full((n,), t, dtype=torch.long).to(device)
            with torch.no_grad():
                pred_noise = model(x, t_batch)
            alpha = self.alphas[t]
            alpha_hat = self.alpha_hats[t]
            beta = self.betas[t]
            if t > 0:
                noise = torch.randn_like(x)
            else:
                noise = 0
            x = (1 / torch.sqrt(alpha)) * (x - ((1 - alpha) / torch.sqrt(1 - alpha_hat)) * pred_noise) + torch.sqrt(beta) * noise
        x = (x.clamp(-1, 1) + 1) / 2
        return x

def calculate_inception_score(images, splits=10):
    inception_model = inception_v3(pretrained=True, transform_input=False).to(device)
    inception_model.eval()
    up = nn.Upsample(size=(299, 299), mode='bilinear', align_corners=False)

    preds = []
    for i in range(0, len(images), 32):
        batch = images[i:i+32].to(device)
        batch = up(batch)
        with torch.no_grad():
            pred = inception_model(batch)
            preds.append(F.softmax(pred, dim=1).cpu().numpy())

    preds = np.concatenate(preds, axis=0)
    split_scores = []
    for k in range(splits):
        part = preds[k * (preds.shape[0] // splits): (k+1) * (preds.shape[0] // splits), :]
        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
        kl = np.mean(np.sum(kl, 1))
        split_scores.append(np.exp(kl))

    return np.mean(split_scores), np.std(split_scores)


def calculate_fid(real_images, generated_images):
    inception_model = inception_v3(pretrained=True, transform_input=False).to(device)
    inception_model.eval()
    up = nn.Upsample(size=(299, 299), mode='bilinear', align_corners=False)

    def get_activations(images):
        activations = []
        for i in range(0, len(images), 32):
            batch = images[i:i+32].to(device)
            batch = up(batch)
            with torch.no_grad():
                pred = inception_model(batch)
            activations.append(pred.cpu().numpy())
        return np.concatenate(activations, axis=0)

    act1 = get_activations(real_images)
    act2 = get_activations(generated_images)
    mu1, sigma1 = np.mean(act1, axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = np.mean(act2, axis=0), np.cov(act2, rowvar=False)

    diff = mu1 - mu2
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = diff.dot(diff) + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid

def train(model, diffusion, dataloader, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        pbar = tqdm(dataloader)
        for images, _ in pbar:
            images = images.to(device)
            t = torch.randint(0, diffusion.timesteps, (images.size(0),), device=device).long()
            x_t, noise = diffusion.noise_images(images, t)
            noise_pred = model(x_t, t)
            loss = F.mse_loss(noise_pred, noise)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pbar.set_description(f"Epoch {epoch+1} | Loss: {loss.item():.4f}")
        torch.save(model, f'diffusion/weights_{epoch}.pth')
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)  # Smaller batch size

model = SimpleUNet().to(device)
diffusion = Diffusion(timesteps=500)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-6)

train(model, diffusion, trainloader, optimizer, epochs=5)



Files already downloaded and verified


Epoch 1 | Loss: 0.9348: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:13<00:00, 113.04it/s]
Epoch 2 | Loss: 0.8551: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:14<00:00, 111.31it/s]
Epoch 3 | Loss: 0.7318: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:13<00:00, 114.62it/s]
Epoch 4 | Loss: 0.6482: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:13<00:00, 112.68it/s]
Epoch 5 | Loss: 0.5199: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:13<00:00, 113.26it/s]


In [None]:
train(model, diffusion, trainloader, optimizer, epochs=5)

Epoch 1 | Loss: 0.2155: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:14<00:00, 111.02it/s]
Epoch 2 | Loss: 0.1574: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:13<00:00, 111.88it/s]
Epoch 3 | Loss: 0.1787: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:14<00:00, 109.87it/s]
Epoch 4 | Loss: 0.2709: 100%|█████████████████████████████████████████████████████| 1563/1563 [00:14<00:00, 110.17it/s]
  0%|                                                                                         | 0/1563 [00:00<?, ?it/s]

In [8]:
model = torch.load('diffusion/weights_49.pth')
samples = diffusion.sample(model, n=10)
os.makedirs("generated", exist_ok=True)
for i, img in enumerate(samples):
    save_image(img, f"generated/{i}.png")

real_imgs = torch.stack([trainset[i][0] for i in range(10)]).to(device)
fake_imgs = torch.stack([transforms.ToTensor()(Image.open(f"generated/{i}.png")) for i in range(10)]).to(device)

mean_is, std_is = calculate_inception_score(fake_imgs)
print(f"Inception Score: {mean_is:.4f} ± {std_is:.4f}")

fid = calculate_fid(real_imgs, fake_imgs)
print(f"FID: {fid:.4f}")

Inception Score: 1.0000 ± 0.0000
FID: 2448.8362
