In [1]:
pip install easydict

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [18]:
import argparse

from sklearn.mixture import GaussianMixture
import torch
import torch.nn.functional as F
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import easydict
from vade import AutoEncoderForPretrain, VaDE
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

N_CLASSES = 5


def train(model, data_loader, optimizer, device, epoch):
    model.train()

    total_loss = 0
    for x in data_loader:
        batch_size = x.size(0)
        x = x.to(device).view(-1, 11)
        recon_x = model(x)
        loss = F.binary_cross_entropy(recon_x, x, reduction='sum') / batch_size
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch {:>3}: Train Loss = {:.4f}'.format(
        epoch, total_loss / len(data_loader)))

class CustomDataset(Dataset):
    def __init__(self, csv):
        self.inp = csv.values

    def __len__(self):
        return len(self.inp) 

    def __getitem__(self,idx):
        inp = torch.FloatTensor(self.inp[idx])
        return inp 

def main():
    # parser = argparse.ArgumentParser(
    #     description='Train VaDE with MNIST dataset',
    #     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # parser.add_argument('--epochs', '-e',
    #                     help='Number of epochs.',
    #                     type=int, default=20)
    # parser.add_argument('--gpu', '-g',
    #                     help='GPU id. (Negative number indicates CPU)',
    #                     type=int, default=-1)
    # parser.add_argument('--learning-rate', '-l',
    #                     help='Learning Rate.',
    #                     type=float, default=0.001)
    # parser.add_argument('--batch-size', '-b',
    #                     help='Batch size.',
    #                     type=int, default=128)
    # parser.add_argument('--out', '-o',
    #                     help='Output path.',
    #                     type=str, default='./vade_parameter.pth')
    # args = parser.parse_args()
    args = easydict.EasyDict ({
        "epochs" : 20,
        "gpu" : -1,
        "batch_size" : 1,
        "learning_rate" : 0.001,
        "out" : './vade_parameter.pth'
    })

    if_use_cuda = torch.cuda.is_available() and args.gpu >= 0
    device = torch.device('cuda:{}'.format(args.gpu) if if_use_cuda else 'cpu')

    # dataset = datasets.MNIST('./data', train=True, download=True,
    #                                transform=transforms.ToTensor())
    CSV_PATH = './stats2021.csv'
    scaler = MinMaxScaler()
    mlb = pd.read_csv(CSV_PATH)
    mlb = mlb.iloc[0:, 4:15]
    scaler.fit(mlb)
    norm_mlb = scaler.transform(mlb)
    norm_mlb = pd.DataFrame(norm_mlb)
    dataset = CustomDataset(norm_mlb)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=0, pin_memory=if_use_cuda)

    pretrain_model = AutoEncoderForPretrain(11, 2).to(device)

    optimizer = torch.optim.Adam(pretrain_model.parameters(),
                                 lr=args.learning_rate)

    for epoch in range(1, args.epochs + 1):
        train(pretrain_model, data_loader, optimizer, device, epoch)

    with torch.no_grad():
        x = torch.vstack([data[0] for data in dataset]).view(-1, 11).to(device)
        z = pretrain_model.encode(x).cpu()

    pretrain_model = pretrain_model.cpu()
    state_dict = pretrain_model.state_dict()

    gmm = GaussianMixture(n_components=10, covariance_type='diag')
    gmm.fit(z)

    model = VaDE(N_CLASSES, 11, 2)
    model.load_state_dict(state_dict, strict=False)
    model._pi.data = torch.log(torch.from_numpy(gmm.weights_)).float()
    model.mu.data = torch.from_numpy(gmm.means_).float()
    model.logvar.data = torch.log(torch.from_numpy(gmm.covariances_)).float()

    torch.save(model.state_dict(), args.out)


if __name__ == '__main__':
    main()

Epoch   1: Train Loss = 7.3487
Epoch   2: Train Loss = 7.2245
Epoch   3: Train Loss = 7.1578
Epoch   4: Train Loss = 7.1777
Epoch   5: Train Loss = 7.0865
Epoch   6: Train Loss = 7.0804
Epoch   7: Train Loss = 7.0681
Epoch   8: Train Loss = 7.1075
Epoch   9: Train Loss = 7.1023
Epoch  10: Train Loss = 7.0982
Epoch  11: Train Loss = 7.0755
Epoch  12: Train Loss = 7.0702
Epoch  13: Train Loss = 7.1167
Epoch  14: Train Loss = 7.0678
Epoch  15: Train Loss = 7.0598
Epoch  16: Train Loss = 7.0639
Epoch  17: Train Loss = 7.0543
Epoch  18: Train Loss = 7.0710
Epoch  19: Train Loss = 7.0773
Epoch  20: Train Loss = 7.0576


In [30]:
import argparse
import matplotlib.pyplot as plt
from munkres import Munkres
from sklearn.manifold import TSNE
import torch
import torch.utils.data
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import easydict
from vade import VaDE, lossfun, ordloss 

N_CLASSES = 5
PLOT_NUM_PER_CLASS = 128


def train(model, data_loader, optimizer, device, epoch, writer):
    model.train()

    total_loss = 0
    for x in data_loader:
        x = x.to(device).view(-1, 11)
        recon_x, mu, logvar = model(x)    ###forward 자동호출
        loss = lossfun(model, x, recon_x, mu, logvar)
        #ord_loss = (ordloss(model, x, 10,2))/20
        total_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    writer.add_scalar('Loss/train', total_loss / len(data_loader), epoch)
    print('Epoch {:>3}: Train Loss = {:.4f}'.format(
        epoch, total_loss / len(data_loader)))


def test(model, data_loader, device, epoch, writer):
    model.eval()

    gain = torch.zeros((N_CLASSES, N_CLASSES), dtype=torch.int, device=device)
    with torch.no_grad():
        for xs, ts in data_loader:
            xs, ts = xs.to(device).view(-1, 11), ts.to(device)
            print (xs, ts)
            ys = model.classify(xs)
            print(ys)
            for t, y in zip(ts, ys):
                gain[t, y] += 1
        cost = (torch.max(gain) - gain).cpu().numpy()
        assign = Munkres().compute(cost)
        acc = torch.sum(gain[tuple(zip(*assign))]).float() / torch.sum(gain)
    writer.add_scalar('Acc/test', acc.item(), epoch)
    writer.add_figure('LatentSpace', fig, epoch)


def main():
    args = easydict.EasyDict ({
        "epochs" : 50,
        "gpu" : -1,
        "learning_rate" : 0.002,
        "batch_size" : 10,
        "pretrain" : None
    })

    if_use_cuda = torch.cuda.is_available() and args.gpu >= 0
    device = torch.device('cuda:{}'.format(args.gpu) if if_use_cuda else 'cpu')

    CSV_PATH = './stats2021.csv'
    scaler = MinMaxScaler()
    mlb = pd.read_csv(CSV_PATH)
    mlb = mlb.iloc[0:, 4:15]
    scaler.fit(mlb)
    norm_mlb = scaler.transform(mlb)
    norm_mlb = pd.DataFrame(norm_mlb)
    dataset = CustomDataset(norm_mlb)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=0, pin_memory=if_use_cuda)

    # For plotting

    model = VaDE(N_CLASSES, 11, 2)
    if args.pretrain:
        model.load_state_dict(torch.load(args.pretrain))
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    # LR decreases every 10 epochs with a decay rate of 0.9
    lr_scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=10, gamma=0.9)

    # TensorBoard
    writer = SummaryWriter()

    for epoch in range(1, args.epochs + 1):
        train(model, data_loader, optimizer, device, epoch, writer)
        test(model, data_loader, device, epoch, writer)
        lr_scheduler.step()

    writer.close()

if __name__ == '__main__':
    main()



Epoch   1: Train Loss = 9.2861


ValueError: too many values to unpack (expected 2)

In [5]:
%load_ext tensorboard
%tensorboard --logdir=runs 