In [109]:
import raise_utils
from raise_utils.learners import MulticlassDL, Autoencoder
from raise_utils.data import DataLoader
from raise_utils.transforms import Transform
from raise_utils.metrics import ClassificationMetrics
from sklearn.preprocessing import LabelBinarizer
from keras.models import Model, Sequential
from keras.layers import Dense
from keras.metrics import F1Score
from imblearn.over_sampling import SMOTE

import numpy as np

In [126]:
class BetaSmoothness:
    def __init__(self, dataset: raise_utils.data.Data, model, bs=128, n_classes=2):                
        self.model = model
        self.data = dataset
        self.bs = bs
        self.n_classes = n_classes
        
        def _func(x):
            _model = Model(inputs=self.model.layers[0].input, outputs=self.model.layers[-2].output)
            return _model(x)

        self.func = _func
    
    def compute_beta(self):
        """Learning Rate Schedule
        # Arguments
            epoch (int): The number of epochs
        # Returns
            lr (float32): learning rate
        """        
        Ka_Kw = 0.
        for i in range((len(self.data.x_train) - 1) // self.bs + 1):
            start_i = i * self.bs
            end_i = start_i + self.bs
            xb = self.data.x_train[start_i:end_i]

            Ka = np.linalg.norm(self.func([xb]))
            
            assert len(self.model.layers[-1].weights[0].shape) == 2
            Kw = np.linalg.norm(self.model.layers[-1].weights[0])
            
            if not np.isinf(Ka / Kw):
                Ka_Kw = max(Ka_Kw, Ka / Kw)

        return (self.n_classes - 1) / self.n_classes * Ka_Kw
        
    
    def get_beta(self):        
        return self.compute_beta()

In [3]:
defect_file_dic = {'ivy':     ['ivy-1.1.csv', 'ivy-1.4.csv', 'ivy-2.0.csv'],
            'synapse': ['synapse-1.0.csv', 'synapse-1.1.csv', 'synapse-1.2.csv'],
            'velocity': ['velocity-1.4.csv', 'velocity-1.5.csv', 'velocity-1.6.csv'],
            'camel': ['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.4.csv', 'camel-1.6.csv'],
            'log4j': ['log4j-1.0.csv', 'log4j-1.1.csv', 'log4j-1.2.csv'],
            'xalan': ['xalan-2.4.csv', 'xalan-2.5.csv', 'xalan-2.6.csv', 'xalan-2.7.csv']
            }

In [97]:
dataset = 'camel'
data = DataLoader.from_files(base_path='../DODGE Data/defect/', files=defect_file_dic[dataset])
print(data.x_train.shape)

(1819, 19)


So we need to shatter 1819 points. The VC-dimension for a neural net is $\Theta(WU)$ or $\mathcal{O}(WL\log_2 W)$. Suppose we want to use 32 units. Then we'd need 57 weights in total. From Montufar et al. (2014), we also want at least the number of units in each hidden layer as the number of inputs. We could use 2 layers with 8 units in each.

In [98]:
ae = Autoencoder(n_layers=1, n_units=[10], n_out=8)
ae.set_data(*data)
ae.fit()

Epoch 1/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 6884.5039   
Epoch 2/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5718.5698 
Epoch 3/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 8236.4189 
Epoch 4/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4989.6470 
Epoch 5/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 8216.4023 
Epoch 6/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 8233.0137  
Epoch 7/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 11213.9082
Epoch 8/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 6492.2979 
Epoch 9/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5553.4375 
Epoch 10/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 30.7398 
Epoch 80/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 27.1163 
Epoch 81/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 27.8106 
Epoch 82/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 26.7920 
Epoch 83/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 27.0603 
Epoch 84/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 28.1483 
Epoch 85/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 27.0209 
Epoch 86/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 28.6694 
Epoch 87/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 25.4259 
Epoch 88/500
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 

In [99]:
data.x_train = ae.encode(data.x_train)
data.x_test = ae.encode(data.x_test)

In [100]:
smote = SMOTE()
data.x_train, data.y_train = smote.fit_resample(data.x_train, data.y_train)

In [101]:
transform = Transform('normalize')
transform.apply(data)

In [103]:
binarizer = LabelBinarizer()
data.y_train = binarizer.fit_transform(data.y_train)
data.y_test = binarizer.transform(data.y_test)

In [104]:
model = MulticlassDL(n_classes=2, n_layers=2, n_units=8, n_epochs=50)
model.set_data(*data)
model.fit()

Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.7322 - val_loss: 0.6634
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6261 - val_loss: 0.5526
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5160 - val_loss: 0.4462
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4113 - val_loss: 0.3484
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.3177 - val_loss: 0.2582
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2312 - val_loss: 0.1799
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1592 - val_loss: 0.1253
Epoch 8/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1113 - val_loss: 0.0894
Epoch 9/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [105]:
model.model.summary()

In [106]:
metrics = ClassificationMetrics(data.y_test.squeeze(), np.argmax(model.model(data.x_test), axis=1))
metrics.add_metrics(['f1', 'pd', 'pf', 'accuracy'])

In [107]:
metrics.get_metrics()

[0.0, 0.0, 0.0, 0.805181347150259]

In [108]:
np.unique(data.y_test)

array([0, 1])

## With alpha-scale transformations

In [110]:
model = Sequential([
    Dense(8, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [111]:
model.compile(optimizer='adam', loss='binary_crossentropy')

In [113]:
model.fit(data.x_train, data.y_train, epochs=1)

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6888


<keras.src.callbacks.history.History at 0x7f0d416b2da0>

In [114]:
out1 = model(data.x_train)

In [117]:
alpha = 0.5
model.layers[-2].weights[0] *= alpha
model.layers[-1].weights[0] /= alpha

In [118]:
out2 = model(data.x_train)

In [119]:
np.linalg.norm(out1 - out2)

0.0

In [128]:
model = Sequential([
    Dense(8, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy')

In [129]:
alpha = 5.
batch_size = 128
for _ in range(50):
    model.fit(data.x_train, data.y_train, epochs=1, batch_size=batch_size)
    
    pre_beta = BetaSmoothness(data, model, batch_size)
    print('pre-beta:', pre_beta.get_beta())

    model.layers[-3].weights[0] /= alpha
    model.layers[-2].weights[0] *= alpha
    
    post_beta = BetaSmoothness(data, model, batch_size)
    print('post-beta:', post_beta.get_beta())

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6944   
pre-beta: 4.060109615325928
post-beta: 4.060109615325928
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6926 
pre-beta: 3.9872632026672363
post-beta: 3.9872632026672363
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6914 
pre-beta: 3.9832568168640137
post-beta: 3.9832568168640137
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6899 
pre-beta: 4.057107448577881
post-beta: 4.057107448577881
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6883 
pre-beta: 4.06126594543457
post-beta: 4.06126594543457
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6876 
pre-beta: 4.07589864730835
post-beta: 4.07589864730835
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6851 
pre-beta: 4.2188191413879395
post