<a href="https://colab.research.google.com/github/vtroyv/generative-adversarial-networks/blob/main/mnistGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
print(torch.__version__)
print("GPU Available: ", torch.cuda.is_available())
if torch.cuda.is_available():
  device= torch.device("cuda:0")
else:
  device="cpu"
print(device)

# If you want to save the model to your personal google drive or transfer files you need to mount google drive
from google.colab import drive
drive.mount('/content/drive/')

2.6.0+cu124
GPU Available:  True
cuda:0
Mounted at /content/drive/


In [2]:
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

# Define a function to create out generator
def make_generator_network(input_size=20, num_hidden_layers=1, num_hidden_units=100, num_output_units=784):
  model = nn.Sequential()
  for i in range(num_hidden_layers):
    model.add_module(f'fc_g{i}', nn.Linear(input_size, num_hidden_units))
    model.add_module(f'relu_g{i}', nn.LeakyReLU())
    input_size = num_hidden_units
  model.add_module(f'fc_g{num_hidden_layers}', nn.Linear(input_size, num_output_units))
  model.add_module('tanh_g', nn.Tanh())
  return model

#Define a function for the descriminator
def make_descriminator_network(input_size, num_hidden_layers=1, num_hidden_units=100, num_output_units =1):
  model = nn.Sequential()
  for i in range(num_hidden_layers):
    model.add_module(f'fc_d{i}', nn.Linear(input_size, num_hidden_units, bias=False))
    model.add_module(f'relu_d{i}', nn.LeakyReLU())
    model.add_module(f'dropout', nn.Dropout(p=0.5))
    input_size= num_hidden_units
  model.add_module(f'fc_d{num_hidden_layers}', nn.Linear(input_size, num_output_units))
  model.add_module('sigmoid', nn.Sigmoid())
  return model



#Next we initialize each of the networks
image_size= (28,28)
z_size = 20
gen_hidden_layers = 1
gen_hidden_size = 100
disc_hidden_layers =1
disc_hidden_size=100
torch.manual_seed(1)

gen_model = make_generator_network(input_size=z_size, num_hidden_layers=gen_hidden_layers, num_hidden_units=gen_hidden_size, num_output_units=np.prod(image_size))
print(gen_model)

disc_model = make_descriminator_network(input_size=np.prod(image_size), num_hidden_layers=disc_hidden_layers, num_hidden_units=disc_hidden_size)
print(disc_model)




Sequential(
  (fc_g0): Linear(in_features=20, out_features=100, bias=True)
  (relu_g0): LeakyReLU(negative_slope=0.01)
  (fc_g1): Linear(in_features=100, out_features=784, bias=True)
  (tanh_g): Tanh()
)
Sequential(
  (fc_d0): Linear(in_features=784, out_features=100, bias=False)
  (relu_d0): LeakyReLU(negative_slope=0.01)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc_d1): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [5]:
import torchvision
from torchvision import transforms
image_path='./'
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5), std=(0.5)) ])

mnist_dataset= torchvision.datasets.MNIST(root=image_path, train=True, transform = transform, download=False)

example,label = next(iter(mnist_dataset))
print(f'Min: {example.min()} Max: {example.max()}')
print(example.shape)

Min: -1.0 Max: 1.0
torch.Size([1, 28, 28])


In [6]:
# Create random vector z based on desired distribution

def create_noise(batch_size, z_size, mode_z):
  if mode_z =='uniform':
    input_z = torch.rand(batch_size, z_size) *2 -1 #why do we combine it like this?
  elif mode_z == 'normal':
    input_z = torch.randn(batch_size, z_size)
  return input_z

In [7]:
# Run through and get a batch of probabilities for the fake images and real images

from torch.utils.data import DataLoader
batch_size = 32
dataloader = DataLoader(mnist_dataset, batch_size, shuffle=False)
input_real, label = next(iter(dataloader))
input_real = input_real.view(batch_size, -1)
torch.manual_seed(1)

mode_z ='uniform'
input_z = create_noise(batch_size, z_size, mode_z)
print('input-z -- shape:', input_z.shape)
print('input-real -- shape:', input_real.shape)

g_output = gen_model(input_z)
print('Output of G -- shape:', g_output.shape)

d_proba_real = disc_model(input_real)
d_proba_fake = disc_model(g_output)
print('Probability of real images -- shape:', d_proba_real.shape)
print('Probability of fake images -- shape:', d_proba_fake.shape)




input-z -- shape: torch.Size([32, 20])
input-real -- shape: torch.Size([32, 784])
Output of G -- shape: torch.Size([32, 784])
Probability of real images -- shape: torch.Size([32, 1])
Probability of fake images -- shape: torch.Size([32, 1])


In [8]:
# Create loss and generate ground truth labels

loss_fn = nn.BCELoss()

# loss for generator
g_labels_real = torch.ones_like(d_proba_fake)
g_loss = loss_fn(d_proba_fake, g_labels_real)
print(f'Generator Loss: {g_loss:.4f}')

# Loss for discriminator
d_labels_real = torch.ones_like(d_proba_real)
d_labels_fake = torch.zeros_like(d_proba_fake)
d_loss_real = loss_fn(d_proba_real, d_labels_real)
d_loss_fake = loss_fn(d_proba_fake, d_labels_fake)

print(f'Discriminator losses: Real {d_loss_real:.4f} Fake {d_loss_fake:.4f}')


Generator Loss: 0.6983
Discriminator losses: Real 0.7479 Fake 0.6885


In [16]:
# Now we set up teh dataloader for the real dataset, the generator, discriminator and separate adam optimizer

batch_size = 64
torch.manual_seed(1)
np.random.seed(1)

mnist_dl =DataLoader(mnist_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

first_images, first_labels = next(iter(mnist_dl))
print(f'The first image is {first_images[0].shape}')
print(f"THe first label is {first_labels[0]}")

gen_model = make_generator_network(
    input_size=z_size,
    num_hidden_layers=gen_hidden_layers,
    num_hidden_units= gen_hidden_size,
    num_output_units=np.prod(image_size)
).to(device)

disc_model = make_descriminator_network(
    input_size=np.prod(image_size),
    num_hidden_layers=disc_hidden_layers,
    num_hidden_units=disc_hidden_size
).to(device)


loss_fn = nn.BCELoss()
g_optimizer= torch.optim.Adam(gen_model.parameters())
d_optimizer=torch.optim.Adam(disc_model.parameters())



The first image is torch.Size([1, 28, 28])
THe first label is 4


In [17]:
#write two training utility functions for the  discriminator and generator using two separate adam optimizers

# Train the discriminator
def d_train(x):
  disc_model.zero_grad()
  # Train the discriminator with a real batch
  batch_size = x.size(0)
  x = x.view(batch_size, -1).to(device)
  d_labels_real = torch.ones(batch_size, 1, device=device)
  d_proba_real = disc_model(x)
  d_loss_real = loss_fn(d_proba_real, d_labels_real)

  # Train discriminator on a fake batch

  input_z = create_noise(batch_size, z_size, mode_z).to(device)
  g_output = gen_model(input_z)
  d_proba_fake = disc_model(g_output)
  d_labels_fake = torch.zeros(batch_size,1,device=device)
  d_loss_fake =loss_fn(d_proba_fake, d_labels_fake)

  # Gradient backprop & optimize only D's parameters
  d_loss = d_loss_real + d_loss_fake
  d_loss.backward()
  d_optimizer.step()
  return d_loss.data.item(), d_proba_real.detach(), d_proba_fake.detach()

  #Train the generator

def g_train(x):
  gen_model.zero_grad()
  batch_size = x.size(0)
  input_z = create_noise(batch_size, z_size, mode_z).to(device)
  g_labels_real = torch.ones(batch_size,1, device=device)

  g_output= gen_model(input_z)
  d_proba_fake = disc_model(g_output)
  g_loss = loss_fn(d_proba_fake, g_labels_real)
  # gradient backprop & optimize ONLY G's parameters
  g_loss.backward()
  g_optimizer.step()
  return g_loss.data.item()







In [19]:
# Now we will start alternatinig traing of geneator and discriminator over 100 epochs
# for each epoch we will record the loss for the generator, discriminator, aswell as loss for real & fake data respectively.
# We will also generatre some examples from a the latent vector z from the current generator model via create_samples() and store these synthnesized images in a list

fixed_z = create_noise(batch_size, z_size, mode_z).to(device)

def create_samples(g_model, input_z):
  g_output = g_model(input_z)
  images =torch.reshape(g_output, (batch_size, *image_size))
  return (images+1)/2.0

epoch_samples = []
all_d_losses = []
all_g_losses = []
all_d_real = []
all_d_fake = []
num_epochs = 100

for epoch in range(1, num_epochs+1):
  d_losses, g_losses = [],[]

  d_vals_real, d_vals_fake = [],[]

  for i, (x,_) in enumerate(mnist_dl):
    d_loss, d_proba_real, d_proba_fake= d_train(x)
    d_losses.append(d_loss)
    g_losses.append(g_train(x))
    d_vals_real.append(d_proba_real.mean().cpu())
    d_vals_fake.append(d_proba_fake.mean().cpu())

  all_d_losses.append(torch.tensor(d_losses).mean())
  all_g_losses.append(torch.tensor(g_losses).mean())
  all_d_real.append(torch.tensor(d_vals_real).mean())
  all_d_fake.append(torch.tensor(d_vals_fake).mean())
  print(f'Epoch {epoch:03d} | Avg Losses >>'
  f'G/D {all_g_losses[-1]:.4f}/{all_d_losses[-1]:4f}'
  f' [D-Real: {all_d_real[-1]:.4f}'
  f' D-Fake: {all_d_fake[-1]:.4f}]')
  epoch_samples.append(create_samples(gen_model, fixed_z).detach().cpu().numpy())



Epoch 001 | Avg Losses >>G/D 0.9020/0.880472 [D-Real: 0.8317 D-Fake: 0.4781]
Epoch 002 | Avg Losses >>G/D 1.0007/1.092743 [D-Real: 0.6307 D-Fake: 0.4228]
Epoch 003 | Avg Losses >>G/D 0.9675/1.160848 [D-Real: 0.5989 D-Fake: 0.4183]
Epoch 004 | Avg Losses >>G/D 0.9796/1.194591 [D-Real: 0.5832 D-Fake: 0.4180]
Epoch 005 | Avg Losses >>G/D 0.9081/1.233730 [D-Real: 0.5686 D-Fake: 0.4323]
Epoch 006 | Avg Losses >>G/D 0.9623/1.215204 [D-Real: 0.5723 D-Fake: 0.4205]
Epoch 007 | Avg Losses >>G/D 0.8827/1.266847 [D-Real: 0.5527 D-Fake: 0.4431]
Epoch 008 | Avg Losses >>G/D 1.0358/1.180521 [D-Real: 0.5917 D-Fake: 0.4089]
Epoch 009 | Avg Losses >>G/D 1.1428/1.095757 [D-Real: 0.6252 D-Fake: 0.3808]
Epoch 010 | Avg Losses >>G/D 0.9542/1.197297 [D-Real: 0.5876 D-Fake: 0.4184]
Epoch 011 | Avg Losses >>G/D 0.9545/1.205703 [D-Real: 0.5836 D-Fake: 0.4190]
Epoch 012 | Avg Losses >>G/D 0.9768/1.206133 [D-Real: 0.5817 D-Fake: 0.4169]
Epoch 013 | Avg Losses >>G/D 0.9669/1.204329 [D-Real: 0.5821 D-Fake: 0.4180]