In [None]:
import copy
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn import datasets
import sys
import random

In [None]:
n_samples = 10000
n_features = 20
X_numpy, Y_numpy = datasets.make_regression(n_samples=n_samples, n_features=n_features, noise=10, random_state=42)
X = torch.from_numpy(X_numpy.astype(np.float32))
Y = torch.from_numpy(Y_numpy.astype(np.float32)).view(-1, 1)

In [None]:
def gradient_descent(x, y, model, loss, lr, n_iters, samples_num):
    total_loss = np.zeros(n_iters)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    mask_helper = range(y.shape[0])
    
    for k in range(n_iters):
        y_pred = model(x)
        sample = random.sample(mask_helper, samples_num)
        l = loss(y_pred[sample], y[sample])
        l.backward()
        total_loss[k] = loss(y_pred, y).item()
        optimizer.step()
        optimizer.zero_grad()
    return total_loss

In [None]:
def spider_boost(x, y, model, model_prev, loss, lr, q, n_iters, samples_num):
    total_loss = np.zeros(n_iters)
    
    opt = torch.optim.SGD(model.parameters(), lr=lr)
    opt_prev = torch.optim.SGD(model_prev.parameters(), lr=lr)
    
    mask_helper = range(y.shape[0])
    
    for k in range(n_iters):
        if k % q == 0:
            # Calculating fool gradient
            opt.zero_grad()
            y_pred = model(X)
            main_loss = loss(y_pred, y)
            main_loss.backward()
            total_loss[k] = main_loss.item()
            
            # Saving current model state and zero grad it
            for param, param_prev in zip(model.parameters(), model_prev.parameters()):
                param_prev.data = param.data.clone().detach()
            opt_prev.zero_grad()
            
            opt.step()
        else:
            sample = random.sample(mask_helper, samples_num)
            
            # Add positive sample gradients
            y_pred = model(X)
            sample_loss = loss(y_pred[sample], y[sample])
            sample_loss.backward()
            
            # Calculate sample gradients of previous state
            y_pred_prev = model_prev(X)
            sample_loss_prev = loss(y_pred_prev[sample], y[sample])
            sample_loss_prev.backward()
            
            # Subtract those gradients from current gradients and save current satae
            for param, param_prev in zip(model.parameters(), model_prev.parameters()):
                param.grad.data -= param_prev.grad.data
                param_prev.data = param.data.clone().detach()
            opt_prev.zero_grad()
            
            # Save total loss
            main_loss = loss(y_pred, y)
            total_loss[k] = main_loss.item()
            
            opt.step()
    return total_loss

In [None]:
learning_rate = 0.001
n_iters = 3000
q = 10
samples_num = 100
loss = torch.nn.MSELoss()

In [None]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
#model = LogisticRegression(n_features, 1)
model = torch.nn.Linear(n_features, 1)
gd_total_loss = gradient_descent(X, Y, model, loss, learning_rate, n_iters, samples_num)

#model = LogisticRegression(n_features, 1)
model = torch.nn.Linear(n_features, 1)
model_prev = copy.deepcopy(model)
sp_boost_total_loss = spider_boost(X, Y, model, model_prev, loss, learning_rate, q, n_iters, samples_num)

plt.scatter(np.arange(n_iters), gd_total_loss)
plt.scatter(np.arange(n_iters), sp_boost_total_loss, color='orange')
plt.yscale('log')
plt.show()