In [1]:
from seqdata import getset
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pylab as plt
import numpy
from numpy import argmax, vstack
import random
import torch
from torch import Tensor
import pytorch_lightning as pl
from torch import optim, nn, utils
#from torch.utils.data import Dataset
from torch.utils.data import DataLoader
#from torch.utils.data import random_split
#from torch.nn import Linear,ReLU, LeakyReLU,ELU,Softmax,Module,CrossEntropyLoss
#from torch.optim import SGD,Adam
#from torch.nn.init import kaiming_uniform_,xavier_uniform_, uniform_, ones_, zeros_, eye_
#from torch.nn.parameter import Parameter



In [2]:
def xsample(x0,size):
    res=[]
    while size-len(res)>=len(x0):
        res.extend(x0)
    res.extend(random.sample(x0,size-len(res)))
    return res

def prepare_data(cats, cat_size=1000, batch_size=10):
    X,y=getset(7)
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1234567, shuffle=True)
    
    u,c=numpy.unique(y_train, return_counts=True)
    print(f"Counts for minimal group: {numpy.min(c)}")
    print(f"Counts for maximal group: {numpy.max(c)}")
    
    joined=[(x,lab) for x,lab in zip(X_train,y_train) if lab in cats]
    X_train=[x for x,lab in joined]
    y_train=[lab for x,lab in joined]
    
    
    joined=[(x,lab) for x,lab in zip(X_test,y_test) if lab in cats]
    X_test=[x for x,lab in joined]
    y_test=[lab for x,lab in joined]
    
    
    test=[[numpy.array(x,dtype=numpy.float32),y] for x,y in zip(X_test,numpy.array(y_test,dtype=numpy.int64))]    
    if cat_size>0:
        train=[]    
        #resample X_train and y_train
        random.seed(None) #1234567)
        new_X,new_y=[],[]
        for cat in cats:
            cat_xy=[(x,lab) for x,lab in zip(X_train,y_train) if lab==cat]
            qs=xsample(cat_xy, cat_size)
            #qs=[random.choice(cat_xy) for ii in range(cat_size)]
            qs=[[numpy.array(x,dtype=numpy.float32),numpy.array(lab,dtype=numpy.int64)] for x,lab in qs]
            train.extend(qs)
    else:
        train=[[numpy.array(x, dtype=numpy.float32),y] for x,y in zip(X_train,numpy.array(y_train,dtype=numpy.int64))]
   
    print(f'train data: {len(train)}, test data: {len(test)}, categories: {len(cats)}' )
    train_dl = DataLoader(train, batch_size=batch_size, shuffle=True)
    test_dl = DataLoader(test, batch_size=1000000, shuffle=False)
    return train_dl, test_dl

In [3]:
class MLP(nn.Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        N=161*10
        
        self.hidden1 = nn.Linear(n_inputs, N)
        nn.init.kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
        self.act1 = nn.LeakyReLU()

  
   #     self.hidden2 = nn.Linear(N, N)
   #     nn.init.kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
   #     self.act2 = nn.LeakyReLU()
 
   #     self.hidden3 = nn.Linear(N, N)
   #     nn.init.kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
   #     self.act3 = nn.LeakyReLU()
        
   #     self.hidden4 = nn.Linear(N, N)
   #     nn.init.kaiming_uniform_(self.hidden4.weight, nonlinearity='relu')
   #     self.act4 = nn.LeakyReLU()
        
   #     self.hidden5 = nn.Linear(N, N)
   #     nn.init.kaiming_uniform_(self.hidden5.weight, nonlinearity='relu')
   #     self.act5 = nn.LeakyReLU()
 

        self.hidden_last = nn.Linear(N, 20)   
        nn.init.kaiming_uniform_(self.hidden_last.weight)        
        #xavier_uniform_(self.hidden_last.weight)
        #uniform_(self.hidden_last.weight)
        #ones_(self.hidden_last.weight)
        self.act_last = nn.Softmax(dim=-1)
        
        self.dropout = nn.Dropout(0.1) 

    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
        X = self.dropout(X)    #
#        X = self.hidden2(X)
#        X = self.act2(X)
#        X = self.dropout(X)    #
#        X = self.hidden3(X)
#        X = self.act3(X)        
        
#        X = self.dropout(X)    #
#        X = self.hidden4(X)
#        X = self.act4(X)       
        
#        X = self.dropout(X)    #
#        X = self.hidden5(X)
#        X = self.act5(X)       
        
        #last
        X = self.hidden_last(X)
        X = self.act_last(X)
        return X

In [4]:
class PLModel(pl.LightningModule):
    def __init__(self,model):
        super().__init__()
        self.model=model

    def training_step(self, batch, batch_idx):
        x, y = batch
        yhat = self.model(x)
        loss = nn.CrossEntropyLoss()(yhat, y)
        
        ind = numpy.argmax(yhat.detach().numpy(),axis=1)
        accuracy=accuracy_score(ind,y)
        self.log("accuracy", accuracy)
        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss
    
    def training_epoch_end(self, training_step_outputs):
        #print(training_step_outputs)
        self.log("mean_train_loss", torch.mean(torch.stack([x["loss"] for x in training_step_outputs])))
    
    def predict_step(self, batch, batch_idx, dataloader_idx = 0):
        return self(batch)

    def forward(self,x):
        #print(x)
        #print(x.shape)
        return self.model.forward(x)
    
    def configure_optimizers(self):
        #optimizer = optim.Adam(self.parameters(), lr=1e-3)
        #optimizer = optim.SGD(self.parameters(), lr=1e-2, momentum=0.9)
        optimizer = optim.AdamW(self.parameters(), lr=1e-3)
        return optimizer



In [15]:
#model=PLModel(MLP(161))
#train,test=prepare_data(range(20),1000,1000000)
#model.dropout=nn.Dropout(0.5)
for cyc in range(20):
    trainer = pl.Trainer(limit_train_batches=100, max_epochs=50,log_every_n_steps=1)
    trainer.fit(model=model, train_dataloaders=train)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 293 K 
-------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.172     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


In [17]:
tx=[x for x,_ in test][0]
ty=[cat for _,cat in test][0].detach().numpy()
print(ty)

predictions = trainer.predict(model, tx)
ind = [numpy.argmax(p) for p in predictions]
accuracy=accuracy_score(ind,ty)
print("Validation accuracy:", accuracy)

##truth=torch.stack([cat for _,cat in test]).detach().numpy()
##print(truth)

[ 1  2 11 ... 16  3 16]


Predicting: 1it [00:00, ?it/s]

Validation accuracy: 0.09013881377321074
