In [None]:
import numpy as np
import pandas as pd
from keras.optimizers import SGD
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.layers import Input,Embedding, Dense,Dropout
from sklearn.metrics import accuracy_score

# Importance reweighting method implementation
## Estimate Beta

In [None]:
def estimateBeta(S,prob,rho0,rho1): 
    n = len(S)
    beta = np.zeros((n,1)) 
    for i in range(n): 
        if S[i]==1:
            beta[i] = (prob[i][1]-rho0)/((1-rho0-rho1)*prob[i][1])
        else:
            beta[i] = (prob[i][0]-rho1)/((1-rho0-rho1)*(prob[i][0]))
    return beta 

## implement approach on MNIST dataset

In [None]:
def run_mnist():
    
    path="../input_data/mnist_dataset.npz"
    data=np.load(path)
    Xtr=data['Xtr']
    Xtr=Xtr.reshape([-1,1,28,28]).transpose([0,2,3,1]).astype('float')
    Str=data['Str']
    Str = to_categorical(Str, num_classes=2)
    Xts=data['Xts']
    Xts=Xts.reshape([-1,1,28,28]).transpose([0,2,3,1]).astype('float')
    Yts=data['Yts']
    Yts=to_categorical(Yts, num_classes=2)
    Xtr, Xts = Xtr / 255.0, Xts / 255.0
    
    
    model=Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu',input_shape=(28, 28, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, (2, 2), activation='relu'))
    model.add(Conv2D(128, (2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(2, activation='softmax'))

    sgd=SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)


    model.compile(optimizer=sgd,
    loss='binary_crossentropy',
    metrics=['accuracy'])

    model.fit(Xtr,Str,epochs = 10,validation_split=0.2,shuffle=True)

    prob = model.predict(Xtr)

    rho0 = np.min(prob[:,1]) 
    rho1 = np.min(prob[:,0]) 

    Str=np.argmax(Str,axis=1)
    beta = estimateBeta(Str,prob,rho0,rho1)
    for i in range(len(beta)):
        if beta[i] < 0:
            beta[i] = 0.0
    Str = to_categorical(Str, num_classes=2)

    model.compile(optimizer=sgd,
          loss='binary_crossentropy',
          metrics=['accuracy'])

    model.fit(Xtr,Str,epochs = 10,validation_split=0.2,sample_weight=beta.flatten(),shuffle=True)

    loss,accuracy=model.evaluate(Xts,Yts)
    print("\nTest set, Loss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100))

    return accuracy,loss

In [None]:
def result():
    accs = []
    losses = []
    for i in range(10):
        cur_acc, cur_los = run_mnist()
        accs.append(cur_acc)
        losses.append(cur_los)
    return accs, losses

## Print result

In [None]:
print('accuracy_mnist:',accs)
print('loss_mnist:',losses)
accuracy_mnist=np.array(accuracy_mnist)
loss_mnist=np.array(loss_mnist)
print('aver_accuracy_mnist:',np.mean(accs))
print('aver_loss_mnist:',np.mean(losses))
print('std_accuracy_mnist:',np.std(accs))
print('std_loss_mnist:',np.std(losses))

## implement approach on CIFAR dataset

In [None]:
def run_cifar():
    path="../input_data/cifar_dataset.npz"
    data=np.load(path)
    Xtr=data['Xtr']
    Xtr=Xtr.reshape([-1,3,32,32]).transpose([0,2,3,1]).astype('float')
    Str=data['Str']
    Str = to_categorical(Str, num_classes=2)
    Xts=data['Xts']
    Xts=Xts.reshape([-1,3,32,32]).transpose([0,2,3,1]).astype('float')
    Yts=data['Yts']
    Yts=to_categorical(Yts, num_classes=2)
    Xtr, Xts = Xtr / 255.0, Xts / 255.0
    
    model=Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu',input_shape=(32, 32, 3)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(2, activation='softmax'))

    sgd=SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)
    
    for i in range(10):
    model.compile(optimizer=sgd,
    loss='binary_crossentropy',
    metrics=['accuracy'])

    model.fit(Xtr,Str,epochs = 10,validation_split=0.2,shuffle=True)

    prob = model.predict(Xtr)

    rho0 = np.min(prob[:,1]) 
    rho1 = np.min(prob[:,0]) 

    Str=np.argmax(Str,axis=1)
    beta = estimateBeta(Str,prob,rho0,rho1)
    for i in range(len(beta)):
        if beta[i] < 0:
            beta[i] = 0.0
    Str = to_categorical(Str, num_classes=2)

    model.compile(optimizer=sgd,
          loss='binary_crossentropy',
          metrics=['accuracy'])

    model.fit(Xtr,Str,epochs = 10,validation_split=0.2,sample_weight=beta.flatten(),shuffle=True)

    loss,accuracy=model.evaluate(Xts,Yts)
    print("\nTest set, Loss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100))

    return accuracy,loss

In [None]:
def result():
    accs = []
    losses = []
    for i in range(10):
        cur_acc, cur_los = run_cifar()
        accs.append(cur_acc)
        losses.append(cur_los)
    return accs, losses

## Print result

In [None]:
print('accuracy_mnist:',accs)
print('loss_mnist:',losses)
accuracy_mnist=np.array(accuracy_cifar)
loss_mnist=np.array(loss_cifar)
print('aver_accuracy_mnist:',np.mean(accs))
print('aver_loss_mnist:',np.mean(losses))
print('std_accuracy_mnist:',np.std(accs))
print('std_loss_mnist:',np.std(losses))