In [1]:
from comet_ml import Experiment, Optimizer

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from collections import defaultdict
from tqdm import trange
import torch
import seaborn as sns
import os

torch.set_default_dtype(torch.float32)

In [3]:
from tabular_hypernet import Hypernetwork
from tabular_hypernet.training_utils import train_slow_step, train_model

In [4]:
DEVICE = 'cuda:1'

## Load data

In [5]:
data = pd.read_csv("../data/Blastchar/churn.csv")

In [6]:
def show_fractions(arr):
    for cls_ in sorted(pd.unique(arr)):
        print(f"{cls_} samples: {(arr==cls_).sum()} ({(arr==cls_).sum()/len(arr)*100:.1f}%)")

processed_data = data.copy()
del processed_data["customerID"]

y_label_enc = LabelEncoder()
y = y_label_enc.fit_transform(processed_data["Churn"].values)
del processed_data["Churn"]

show_fractions(y)

0 samples: 5174 (73.5%)
1 samples: 1869 (26.5%)


In [7]:
label_encoders = {}

for col in processed_data.columns:
    if processed_data[col].dtype == 'object':
        encoder = LabelEncoder()
        processed_data[col] = encoder.fit_transform(processed_data[col].values)
        label_encoders[col] = encoder
        
X = processed_data.values
print(X.shape)

(7043, 19)


### Split it into train and test set

In [8]:
class GenericDataset(torch.utils.data.IterableDataset):
    def __init__(self, data, shuffle: bool=False, samples_no: int=None):
        samples = samples_no or len(data[0])
        self.indices = np.arange(samples)
        self.shuffle = True
        if shuffle:
            self.indices = np.random.permutation(self.indices)
        self.index = 0
        self.max_samples = samples
        self.data_x = data[0].to(torch.float32)
        self.data_y = data[1]

    def __iter__(self):
        if self.shuffle:
            self.indices = np.random.permutation(self.indices)
        while self.index < self.max_samples:
            _idx = self.indices[self.index]
            yield self.data_x[_idx], self.data_y[_idx]
            self.index += 1
    
    def __len__(self):
        return self.data_x.shape[0]
    
def get_dataloader(X, y, size=None, batch_size=32):
    train_dataset = GenericDataset((X, y), size)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=1)
    
    return trainloader

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

print("Training data:")
show_fractions(y_train)

print("Test data:")
show_fractions(y_test)

Training data:
0 samples: 3863 (73.1%)
1 samples: 1419 (26.9%)
Test data:
0 samples: 1311 (74.4%)
1 samples: 450 (25.6%)


## Preprocess

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
X_train, X_test, y_train, y_test = [torch.from_numpy(x) for x in [X_train, X_test, y_train, y_test]]

## Benchmark

In [12]:
criterion = torch.nn.CrossEntropyLoss()

def _summarize_results(y_pred, y_score, y_test, labels):
    results = []
    for idx, label in enumerate(labels):
        y_pred_filt = y_pred[y_test==idx]
        y_test_filt = y_test[y_test==idx]
        acc = (y_pred_filt==y_test_filt.numpy()).sum()/len(y_test_filt)*100
        results.append({
            "Class": label,
            "Accuracy": acc
        })
        
    acc = (y_pred==y_test.numpy()).sum()/len(y_test)*100    
    results.append({
        "Class": "Total",
        "Accuracy": acc
    })
    results.append({
        "Class": "Loss",
        "Accuracy": criterion(torch.from_numpy(y_score), y_test).item()
    })
    return results


def test_model(model_fn, train_data, test_data, label_encoder=None, iters=10):
    X_train, y_train = train_data
    X_test, y_test = test_data
    if label_encoder is not None:
        labels = label_encoder.classes_
    else:
        labels = sorted(pd.unique(test_data))
    
    results = []

    for i in trange(iters):
        model = model_fn()

        model.fit(X_train, y_train);    
        y_pred = model.predict(X_test)
        y_score = model.predict_proba(X_test)
        results.extend(_summarize_results(y_pred, y_score, y_test, labels))

    dframe = pd.DataFrame.from_dict(results)
    sns.boxplot(data=dframe.iloc[:-1], y="Class", x="Accuracy", orient='h')
    return dframe

### XGBoost

In [13]:
from xgboost import XGBClassifier

In [14]:
# xgb_dframe = test_model(lambda: XGBClassifier(verbosity=0, use_label_encoder=False), 
#                         (X_train, y_train), 
#                         (X_test, y_test),
#                         label_encoder=y_label_enc, iters=1)

In [15]:
# xgb_dframe

## Hypernetwork

In [16]:
class AdaptedNetwork:
    def __init__(self, network):
        self.network = network
        self.optimizer = torch.optim.Adam(network.parameters(), lr=3e-5)
        self.criterion = torch.nn.CrossEntropyLoss()
    
    def fit(self, train_data, epochs=200):
        train_model(self.network, self.optimizer, self.criterion, train_data, epochs, DEVICE)
                    
    def predict(self, dataloader):
        res = []
        for (X, _) in dataloader:
            predictions = self.network(X.to(DEVICE)).cpu().detach().numpy()
            predictions = np.argmax(predictions, axis=1)
            res.append(predictions)
        
        res = np.concatenate(res)
        return res
    
    def predict_proba(self, dataloader):
        res = []
        for (X, _) in dataloader:
            predictions = self.network(X.to(DEVICE)).cpu().detach().numpy()
            res.append(predictions)
        
        res = np.concatenate(res)
        return res
    
def get_network(inputs, outputs):
    return 

def network_fn(mask_size, masks_no):
    def _inner():
        network = Hypernetwork(inp_size=X_train.shape[1], 
                            out_size=y.max().item()+1, 
                            mask_size=mask_size,
                            layers=[128, 128, 128],
                            node_hidden_size=100, 
                            test_nodes=masks_no).to(DEVICE)

        network = AdaptedNetwork(network)
        return network
    return _inner

In [17]:
def test_hypernet_model(model_fn, trainloader, testloader, label_encoder=None, iters=10):
    if label_encoder is not None:
        labels = label_encoder.classes_
    else:
        labels = sorted(pd.unique(test_data))
    
    results = []

    for i in trange(iters):
        model = model_fn()

        model.fit(trainloader);    
        y_pred = model.predict(testloader)
        y_score = model.predict_proba(testloader)
        results.extend(_summarize_results(y_pred, y_score, y_test, labels))

    dframe = pd.DataFrame.from_dict(results)
    sns.boxplot(data=dframe.iloc[:-1], y="Class", x="Accuracy", orient='h')
    return dframe

In [18]:
trainloader, testloader = get_dataloader(X_train, y_train), get_dataloader(X_test, y_test)

In [None]:
nn_results = test_hypernet_model(network_fn(15, 20), trainloader, testloader, y_label_enc, 1)

  0%|                                                                                                         | 0/1 [00:00<?, ?it/s]
  0%|                                                                                                       | 0/200 [00:00<?, ?it/s][A
  0%|▍                                                                                              | 1/200 [00:04<14:00,  4.22s/it][A
  1%|▉                                                                                              | 2/200 [00:08<13:40,  4.14s/it][A
  2%|█▍                                                                                             | 3/200 [00:12<13:36,  4.15s/it][A
  2%|█▉                                                                                             | 4/200 [00:16<13:41,  4.19s/it][A
  2%|██▍                                                                                            | 5/200 [00:21<14:35,  4.49s/it][A
  3%|██▊                                           

In [None]:
# results = defaultdict(list)
# for i in range(1):
#     for size in [100, 500]:
#         hypernet = Hypernetwork(inp_size=X.shape[1], 
#                             out_size=y.max().item()+1, 
#                             mask_size=15,
#                             layers=[32, 64, 32],
#                             node_hidden_size=100, 
#                             test_nodes=10).to(DEVICE)
        
#         hypernet = hypernet.train()
#         optimizer = torch.optim.Adam(hypernet.parameters(), lr=3e-4)

#         trainloader, testloader = get_dataloader(X_train, y_train), get_dataloader(X_test, y_test)

#         res = train_slow_step(hypernet, optimizer, criterion, 
#                           (trainloader, testloader), 
#                           X_train.shape[0], 
#                           10, 
#                           10,
#                           test_every=10,
#                           tag='blastchar-hypernet',
#                           device=DEVICE)
#         break

In [None]:
res