In [1]:
import numpy as np 
import pandas as pd
from sklearn.neighbors.kde import KernelDensity
from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score
import torch
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [2]:
class ThyroidLoader(object):
    def __init__(self, data_path, N_train, mode="train"):
        self.mode=mode
        data = np.load(data_path)

        labels = data[:,-1]
        features = data[:,:-1]
        N, D = features.shape
        
        normal_data = features[labels==1]
        normal_labels = labels[labels==1]

        N_normal = normal_data.shape[0]

        attack_data = features[labels==0]
        attack_labels = labels[labels==0]

        N_attack = attack_data.shape[0]

        randIdx = np.arange(N_attack)
        np.random.shuffle(randIdx)
        self.N_train = N_train
        self.train = attack_data[randIdx[:self.N_train]]
        self.train_labels = attack_labels[randIdx[:self.N_train]]
        
        self.test = attack_data[randIdx[self.N_train:]]
        self.test_labels = attack_labels[randIdx[self.N_train:]]
        
        self.test = np.concatenate((self.test, normal_data),axis=0)
        self.test_labels = np.concatenate((self.test_labels, normal_labels),axis=0)


    def __len__(self):
        """
        Number of images in the object dataset.
        """
        if self.mode == "train":
            return self.train.shape[0]
        else:
            return self.test.shape[0]


    def __getitem__(self, index):
        if self.mode == "train":
            return np.float32(self.train[index]), np.float32(self.train_labels[index])
        else:
            return np.float32(self.test[index]), np.float32(self.test_labels[index])  

In [3]:
class LVAE(nn.Module):
    def __init__(self):
        super(LVAE,self).__init__()
        self.enc_1 = nn.Linear(36,20)
        self.enc = nn.Linear(20,11)
        
        self.act = nn.Tanh()
        self.act_s = nn.Sigmoid()
        self.mu = nn.Linear(11,10)
        self.log_var = nn.Linear(11,10)
        
        self.z = nn.Linear(10,11)
        self.z_1 = nn.Linear(11,20)
        self.dec = nn.Linear(20,36)
    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var/2)
        eps = torch.randn_like(std)
        return mu + eps * std
    def forward(self,x):
        enc_1 = self.enc_1(x)
        enc = self.act(enc_1)
        enc = self.enc(enc)
        enc = self.act(enc)
        
        mu = self.mu(enc)
        log_var = self.log_var(enc)
        o = self.reparameterize(mu,log_var)
        z = self.z(o)
        z_1 = self.act(z)
        z_1 = self.z_1(z_1)
        dec = self.act(z_1)
        dec = self.dec(dec)
        dec = self.act_s(dec)
        return enc_1, enc, mu, log_var, o, z, z_1, dec

In [4]:
data_path = 'Thyroid.npy'

batch_size = 200
learn_rate = 0.0001
All_train = 3679

In [5]:
def get_loader(data_path, batch_size, N_train, mode='train'):
    """Build and return data loader."""
    
    dataset = ThyroidLoader(data_path, N_train, mode)

    shuffle = False
    if mode == 'train':
        shuffle = True

    data_loader = DataLoader(dataset=dataset,
                             batch_size=batch_size,
                             shuffle=shuffle)
    return data_loader

In [6]:
def relative_euclidean_distance(a, b):
    return (a-b).norm(2, dim=1) / a.norm(2, dim=1)

# LVAE效果

In [7]:
def loss_function(recon_x, x, mu, logvar, enc, z,  enc_1, z_1):
    #BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
    criterion_elementwise_mean = nn.MSELoss(reduction='sum')
    BCE_x = criterion_elementwise_mean(recon_x,x)
    BCE_z = criterion_elementwise_mean(enc,z)
    BCE_z_1 = criterion_elementwise_mean(enc_1,z_1)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE_x + KLD 

In [8]:
Ratio = 0.1
iter_per_epoch = 200
result = []
diff_quantity_result= []
N_train = int(All_train*Ratio*(8))
lvae = LVAE()
optimizer = torch.optim.Adam(lvae.parameters(),lr=learn_rate)
data_loader_train = get_loader(data_path, batch_size, N_train, mode='train')
# for i in range(iter_per_epoch):
#     for j ,(input_data, labels)  in enumerate(data_loader_train):
#         enc_1, enc, mu, log_var, o, z,  z_1, dec = lvae(input_data)
#         optimizer.zero_grad()
#         loss = loss_function(dec, input_data, mu, log_var, enc, z, enc_1, z_1)
#         loss.backward()
#         optimizer.step()

In [34]:
from mpl_toolkits.mplot3d import Axes3D  
import matplotlib.pyplot as plt
%matplotlib auto
#%matplotlib inline
batch_size = 1000
All_train = 3679
N_train = int(All_train*Ratio*(8))
data_loader_test = get_loader(data_path, batch_size, N_train, mode='test')
test_enc = []
test_labels = []

    
    
for i ,(input_data, labels)  in enumerate(data_loader_test):
    enc_1, enc, mu, log_var, o, z,  z_1, dec = lvae(input_data)
    rec_euclidean = relative_euclidean_distance(input_data, dec)
    rec_cosine = F.cosine_similarity(input_data, dec, dim=1)
    enc = torch.cat([enc, rec_euclidean.unsqueeze(-1)], dim=1)
    enc = enc.detach().numpy()
#     enc = enc_1.detach().numpy()
    test_enc.append(enc)
    test_labels.append(labels.numpy())

    
test_labels = np.concatenate(test_labels,axis=0)
tsne = TSNE(n_components=2, learning_rate=50).fit_transform(np.squeeze(test_enc))

plt.figure(figsize=(8, 6))
plt.subplot()
plt.scatter(tsne[:, 0], tsne[:, 1], c=test_labels)
#plt.colorbar()#使用这一句就可以分辨出，颜色对应的类了！神奇啊。
plt.show()    


Using matplotlib backend: Qt5Agg


In [29]:
from mpl_toolkits.mplot3d import Axes3D  
import matplotlib.pyplot as plt
%matplotlib auto

batch_size = 1000
All_train = 3679
N_train = int(All_train*Ratio*(8))
data_loader_test = get_loader(data_path, batch_size, N_train, mode='test')
test_enc = []
test_labels = []

    
    
for i ,(input_data, labels)  in enumerate(data_loader_test):
    enc_1, enc, mu, log_var, o, z,  z_1, dec = lvae(input_data)
#     rec_euclidean = relative_euclidean_distance(input_data, dec)
#     rec_cosine = F.cosine_similarity(input_data, dec, dim=1)
#     enc = torch.cat([enc, rec_euclidean.unsqueeze(-1)], dim=1)
#     enc = enc.detach().numpy()
    enc = enc_1.detach().numpy()
    test_enc.append(enc)
    test_labels.append(labels.numpy())

    
test_labels = np.concatenate(test_labels,axis=0)
embedded = TSNE(n_components=3, learning_rate=100).fit_transform(np.squeeze(test_enc))
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(embedded[:, 0], embedded[:, 1], embedded[:, 2], c=test_labels)


plt.axis()
plt.show()




Using matplotlib backend: Qt5Agg


# VAE效果

In [8]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE,self).__init__()
        
        self.encoder = nn.Sequential(

            nn.Linear(36,20),
            nn.Tanh(),
            nn.Linear(20,11),
            nn.Tanh(),

           
        )
        self.mu = nn.Linear(11,10)
        self.log_var = nn.Linear(11,10)
        
        self.decoder = nn.Sequential(
            nn.Linear(10,11),
            nn.Tanh(),
            nn.Linear(11,20),
            nn.Tanh(),
            nn.Linear(20,36),
            nn.Sigmoid()
        )
    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var/2)
        eps = torch.randn_like(std)
        return mu + eps * std
    def forward(self,x):
        enc = self.encoder(x)
        
        
        mu = self.mu(enc)
        log_var = self.log_var(enc)
        z = self.reparameterize(mu,log_var)
        dec = self.decoder(z)
        return enc, dec, mu, log_var, z

In [9]:
def loss_function(recon_x, x, mu, logvar):
    #BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
    criterion_elementwise_mean = nn.MSELoss(reduction='sum')
    BCE = criterion_elementwise_mean(recon_x,x)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD

In [10]:
Ratio = 0.1
iter_per_epoch = 200
result = []
diff_quantity_result= []
N_train = int(All_train*Ratio*(8))
vae = VAE()
optimizer = torch.optim.Adam(vae.parameters(),lr=learn_rate)
data_loader_train = get_loader(data_path, batch_size, N_train, mode='train')
for i in range(iter_per_epoch):
    for j ,(input_data, labels)  in enumerate(data_loader_train):
        enc, dec, mu, log_var, z = vae(input_data)
        optimizer.zero_grad()
        loss = loss_function(dec, input_data, mu, log_var)
        loss.backward()
        optimizer.step()

In [24]:
from mpl_toolkits.mplot3d import Axes3D  
import matplotlib.pyplot as plt
%matplotlib auto

batch_size = 1000
All_train = 3679
N_train = int(All_train*Ratio*(8))
data_loader_test = get_loader(data_path, batch_size, N_train, mode='test')
test_enc = []
test_labels = []


      
for i ,(input_data, labels)  in enumerate(data_loader_test):
    enc, dec, mu, log_var, z = vae(input_data)
    enc = enc.detach().numpy()
    
    test_enc.append(enc)
    test_labels.append(labels.numpy())

    
test_labels = np.concatenate(test_labels,axis=0)
tsne = TSNE(n_components=2, learning_rate=30).fit_transform(np.squeeze(test_enc))
plt.figure(figsize=(8, 6))
plt.subplot()
plt.scatter(tsne[:, 0], tsne[:, 1], c=test_labels)
#plt.colorbar()#使用这一句就可以分辨出，颜色对应的类了！神奇啊。
plt.show()

Using matplotlib backend: Qt5Agg


In [30]:
from mpl_toolkits.mplot3d import Axes3D  
import matplotlib.pyplot as plt
%matplotlib auto

batch_size = 1000
All_train = 3679
N_train = int(All_train*Ratio*(8))
data_loader_test = get_loader(data_path, batch_size, N_train, mode='test')
test_enc = []
test_labels = []


      
for i ,(input_data, labels)  in enumerate(data_loader_test):
    enc, dec, mu, log_var, z = vae(input_data)
    rec_euclidean = relative_euclidean_distance(input_data, dec)
    rec_cosine = F.cosine_similarity(input_data, dec, dim=1)     
    enc = torch.cat([enc, rec_euclidean.unsqueeze(-1)], dim=1)
    enc = enc.detach().numpy()
    
    test_enc.append(enc)
    test_labels.append(labels.numpy())

    
test_labels = np.concatenate(test_labels,axis=0)
tsne = TSNE(n_components=2, learning_rate=30).fit_transform(np.squeeze(test_enc))
plt.figure(figsize=(8, 6))
plt.subplot()
plt.scatter(tsne[:, 0], tsne[:, 1], c=test_labels)
#plt.colorbar()#使用这一句就可以分辨出，颜色对应的类了！神奇啊。
plt.show()

Using matplotlib backend: Qt5Agg
