In [347]:
# python imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import math
import wandb
import time

In [348]:
#torch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [349]:
df = pd.read_csv("dataset_done.csv",index_col=0)
df = df.drop(columns={"index"})

In [350]:
df.head()

Unnamed: 0,ALCOHOL,SUGAR,ACIDS,ENERGY,acid,bodied,dry,sweet,tannic,floral,...,dairy_food,fatty_food,lamb_food,nutty_food,box,desert,red,rose,sparkling,white
0,15.0,3.0,5.2,80.0,0,3,0,0,2,0,...,0,1,1,0,1,0,0,0,0,0
1,15.0,3.0,5.2,80.0,0,3,0,0,2,0,...,1,0,0,1,1,0,0,0,0,0
2,15.0,3.0,5.2,80.0,0,3,0,0,2,0,...,0,0,0,0,1,0,0,0,0,0
3,15.0,3.0,5.2,80.0,0,3,0,0,2,0,...,0,0,0,0,1,0,0,0,0,0
4,12.0,16.0,4.9,70.0,2,0,0,2,0,0,...,0,0,0,0,1,0,0,0,0,0


In [351]:
class CycleGenerator(nn.Module):
    """
    Generator for CycleGAN
    """
    def __init__(self, input_channels, output_channels,hidden):
        super().__init__()
        filters = 16

        # net
        modules = [nn.Linear(input_channels, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, output_channels),
                   nn.ReLU()]

        self.net = nn.Sequential(*modules)

    def forward(self, x):
        return self.net(x)


In [352]:
class CycleDiscriminator(nn.Module):
    """
    Discriminator for CycleGAN
    """

    def __init__(self, input_channels,hidden):
        super().__init__()

        self.net = nn.Sequential(
           nn.Linear(input_channels, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, hidden),
                   nn.ReLU(),
                   nn.Linear(hidden, 1),
                   nn.Sigmoid())

    def forward(self, x):
        return self.net(x)


In [353]:
food_columns = [x for x in df.columns if "food" in x   ]
wine_columns = [x for x in df.columns if "food" not in x   ]

In [367]:
food_columns

['fish_food',
 'desserts_food',
 'fruit_food',
 'vegetables_food',
 'sweet_food',
 'salty_food',
 'sour_food',
 'bitter_food',
 'beef_food',
 'poultry_food',
 'game_food',
 'pork_food',
 'dairy_food',
 'fatty_food',
 'lamb_food',
 'nutty_food']

In [354]:
wine_columns

['ALCOHOL',
 'SUGAR',
 'ACIDS',
 'ENERGY',
 'acid',
 'bodied',
 'dry',
 'sweet',
 'tannic',
 'floral',
 'citrus fruit',
 'stone fruit',
 'tropical fruit',
 'red fruit',
 'black fruit',
 'honey',
 'herbal',
 'bitter',
 'mineral',
 'creamy',
 'spice',
 'earthy',
 'baking spice',
 'leather',
 'astringent',
 'apple/pear',
 'melon',
 'buttery',
 'pastry',
 'nut',
 'chemical/medicinal',
 'wood',
 'smoky',
 'box',
 'desert',
 'red',
 'rose',
 'sparkling',
 'white']

In [364]:
def train_cycle_gan():

    start_time = time.time()

    food_data = df[food_columns]
    wine_data = df[wine_columns]

    print("[INFO] Setting up")
    n_epochs = 20
    food_data = torch.from_numpy(food_data.to_numpy()).float().cuda()
    wine_data = torch.from_numpy(wine_data.to_numpy()).float().cuda()


    dataset_params = {'batch_size': 128, 'shuffle': True}
    food_loader = torch.utils.data.DataLoader(food_data, **dataset_params)
    wine_loader = torch.utils.data.DataLoader(wine_data, **dataset_params)
    assert (len(food_data) == len(wine_data))

    # Models - Assuming x is food and y is wine
    generator_x2y = CycleGenerator(input_channels=len(food_columns), output_channels=len(wine_columns),hidden=20).cuda()
    generator_y2x = CycleGenerator(input_channels=len(wine_columns), output_channels=len(food_columns),hidden=20).cuda()
    discriminator_x = CycleDiscriminator(input_channels=len(food_columns),hidden=20).cuda()
    discriminator_y = CycleDiscriminator(input_channels=len(wine_columns),hidden=20).cuda()

    generator_optimizer = torch.optim.Adam(list(generator_x2y.parameters()) + list(generator_y2x.parameters()), lr=2e-4,
                                           betas=(0.5, 0.999))
    discriminator_x_optimizer = torch.optim.Adam(discriminator_x.parameters(), lr=2e-4, betas=(0.5, 0.999))
    discriminator_y_optimizer = torch.optim.Adam(discriminator_y.parameters(), lr=2e-4, betas=(0.5, 0.999))

    # Schedulers
    num_batches = len(food_loader)
    max_steps = max(1, n_epochs * num_batches)

    def scheduler(epoch):
        return max(0, (max_steps - epoch * num_batches) / max_steps)

    generator_scheduler = optim.lr_scheduler.LambdaLR(generator_optimizer, scheduler)
    discriminator_x_scheduler = optim.lr_scheduler.LambdaLR(discriminator_x_optimizer, scheduler)
    discriminator_y_scheduler = optim.lr_scheduler.LambdaLR(discriminator_y_optimizer, scheduler)
    # Training
    print("[INFO] Training discriminator, generator and encoder")
    mse_loss = nn.MSELoss()
    l1_loss = nn.L1Loss()
    zeros, ones = torch.tensor(0.).cuda(), torch.tensor(1.).cuda()
    cycle_lambda = 10

    for epoch in range(n_epochs):
        epoch_start = time.time()
        for x_batch, y_batch in zip(food_loader, wine_loader):
            # Training generators x2y and y2x
            x_fake = generator_y2x(y_batch)
            d_x_fake = discriminator_x(x_fake)
            generator_x_loss = mse_loss(d_x_fake, ones)
            
            y_fake = generator_x2y(x_batch)
            d_y_fake = discriminator_y(y_fake)
            generator_y_loss = mse_loss(d_y_fake, ones)
            
            x_recovered = generator_y2x(y_fake)
            y_recovered = generator_x2y(x_fake)
            cycle_loss = l1_loss(x_recovered, x_batch) + l1_loss(y_recovered, y_batch)
            
            generator_loss = generator_x_loss + generator_y_loss + cycle_lambda * cycle_loss
            generator_optimizer.zero_grad()
            generator_loss.backward()
            generator_optimizer.step()                
            
            # Training discriminator for x
            x_fake = generator_y2x(y_batch)
            d_x_fake = discriminator_x(x_fake)
            d_x_true = discriminator_x(x_batch)
            discriminator_x_loss = mse_loss(d_x_true, ones ) + mse_loss(d_x_fake, zeros)
            
            discriminator_x_optimizer.zero_grad()
            discriminator_x_loss.backward()
            discriminator_x_optimizer.step()

            # Training discriminator for y
            y_fake = generator_x2y(x_batch)
            d_y_fake = discriminator_y(y_fake)
            d_y_true = discriminator_y(y_batch)
            discriminator_y_loss = mse_loss(d_y_true, ones) + mse_loss(d_y_fake, zeros)

            discriminator_y_optimizer.zero_grad()
            discriminator_y_loss.backward()
            discriminator_y_optimizer.step()
        generator_scheduler.step()
        discriminator_x_scheduler.step()
        discriminator_y_scheduler.step()

        print(f"[{100*(epoch+1)/n_epochs:.2f}%] Epoch {epoch + 1} - Time elapsed: {time.time() - epoch_start:.2f} - GENERATOR LOSS : {generator_loss} - CYCLELOSS : {cycle_loss}")

    # Generating samples
    generator_x2y.eval()
    generator_y2x.eval()
    discriminator_x.eval()
    discriminator_y.eval()
    with torch.no_grad():
        print("[INFO] Creating samples")
        food_samples = food_data[:20]
        food2wine = generator_x2y(food_samples)
        food_recovered = generator_y2x(generator_x2y(food_samples))

        wine_samples = wine_data[:20]
        wine2food = generator_y2x(wine_samples)
        wine_recovered = generator_x2y(generator_y2x(wine_samples))

        # # Changing to desired output format
        # mnist_samples = mnist_samples.permute(0, 2, 3, 1).cpu().numpy()
        # mnist2cmnist = mnist2cmnist.permute(0, 2, 3, 1).cpu().numpy()
        # mnist_recovered = mnist_recovered.permute(0, 2, 3, 1).cpu().numpy()

        # cmnist_samples = cmnist_samples.permute(0, 2, 3, 1).cpu().numpy()
        # cmnist2mnist = cmnist2mnist.permute(0, 2, 3, 1).cpu().numpy()
        # cmnist_recovered = cmnist_recovered.permute(0, 2, 3, 1).cpu().numpy()

    print(f"[DONE] Time elapsed: {time.time() - start_time:.2f} s")
    return food_samples, food2wine, food_recovered, wine_samples, wine2food, wine_recovered

In [366]:
food_samples, food2wine, food_recovered, wine_samples, wine2food, wine_recovered = train_cycle_gan()

[INFO] Setting up
[INFO] Training discriminator, generator and encoder
[5.00%] Epoch 1 - Time elapsed: 3.07 - GENERATOR LOSS : 31.23317527770996 - CYCLELOSS : 3.0238101482391357
[10.00%] Epoch 2 - Time elapsed: 3.00 - GENERATOR LOSS : 12.833194732666016 - CYCLELOSS : 1.1464786529541016
[15.00%] Epoch 3 - Time elapsed: 2.98 - GENERATOR LOSS : 14.691238403320312 - CYCLELOSS : 1.3165349960327148
[20.00%] Epoch 4 - Time elapsed: 2.99 - GENERATOR LOSS : 11.262306213378906 - CYCLELOSS : 0.9566847681999207
[25.00%] Epoch 5 - Time elapsed: 2.99 - GENERATOR LOSS : 12.727082252502441 - CYCLELOSS : 1.0937271118164062
[30.00%] Epoch 6 - Time elapsed: 3.06 - GENERATOR LOSS : 12.812178611755371 - CYCLELOSS : 1.0965116024017334
[35.00%] Epoch 7 - Time elapsed: 3.08 - GENERATOR LOSS : 11.804898262023926 - CYCLELOSS : 0.9918487071990967
[40.00%] Epoch 8 - Time elapsed: 3.04 - GENERATOR LOSS : 12.589426040649414 - CYCLELOSS : 1.0689287185668945
[45.00%] Epoch 9 - Time elapsed: 3.04 - GENERATOR LOSS : 13

In [361]:
food2wine[0]

tensor([0.6598, 0.0566, 0.3376, 3.8875, 0.0267, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0451, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0')

In [362]:
wine_samples[0]

tensor([15.0000,  3.0000,  5.2000, 80.0000,  0.0000,  3.0000,  0.0000,  0.0000,
         2.0000,  0.0000,  0.0000,  1.0000,  0.0000,  1.0000,  2.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         2.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0')

In [363]:
wine_columns

['ALCOHOL',
 'SUGAR',
 'ACIDS',
 'ENERGY',
 'acid',
 'bodied',
 'dry',
 'sweet',
 'tannic',
 'floral',
 'citrus fruit',
 'stone fruit',
 'tropical fruit',
 'red fruit',
 'black fruit',
 'honey',
 'herbal',
 'bitter',
 'mineral',
 'creamy',
 'spice',
 'earthy',
 'baking spice',
 'leather',
 'astringent',
 'apple/pear',
 'melon',
 'buttery',
 'pastry',
 'nut',
 'chemical/medicinal',
 'wood',
 'smoky',
 'box',
 'desert',
 'red',
 'rose',
 'sparkling',
 'white']