# BBD(Bimodel Backdoor Defence)


In [1]:
%pylab inline

import os
import h5py
import tensorflow as tf
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator



Populating the interactive namespace from numpy and matplotlib


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Import Data

All data are nomalize by 255 to obtain a value between 0 and 1. Adjust the `DATA_DIR`, `MODEL_DIR`, `DATA_FILE`, ``



In [63]:
# input settings
DATA_DIR = '/content/drive/MyDrive/ML4Cyber/data'
CLEAN_DATA_FILE = 'clean_test_data.h5'
POISON_DATA_FILE = 'sunglasses_poisoned_data.h5'
MODEL_DIR = '/content/drive/MyDrive/ML4Cyber/model'

In [64]:
def load_dataset(data_file, nomalize=True):
    _, data_name = data_file.split('.')
    print('loading data file:{}'.format(data_name))
    data = h5py.File(data_file, 'r')
    x_data = np.array(data['data'])
    if nomalize:
        x_data = x_data / 255.
    y_data = np.array(data['label'])
    x_data = x_data.transpose((0,2,3,1))

    print('X_test shape %s' % str(x_data.shape))
    print('Y_test shape %s' % str(y_data.shape))

    return x_data, y_data

In [66]:
clean_x, clean_y = load_dataset(os.path.join(DATA_DIR, CLEAN_DATA_FILE))
poison_x, poison_y = load_dataset(os.path.join(DATA_DIR, POISON_DATA_FILE))

loading data file:h5
X_test shape (12830, 55, 47, 3)
Y_test shape (12830,)
loading data file:h5
X_test shape (12830, 55, 47, 3)
Y_test shape (12830,)


In [67]:
mix_x = np.concatenate((clean_x, poison_x), axis=0)
poison_y = np.array([1283] * poison_y.shape[0])
mix_y = np.concatenate((clean_y, poison_y), axis=0)
print(mix_x.shape)
print(mix_y.shape)

(25660, 55, 47, 3)
(25660,)


In [210]:
class BBD:
    def __init__(self, bad_model, clean_model, start_from_1=True, target_label=None):
        self.bad_model = bad_model
        self.clean_model = clean_model
        if start_from_1:
            self.start_factor = 1
        else:
            self.start_factor = 0
    
    def eval(self, x_data, y_data, poison_class=1283, targeted = True, batch_size=64):
        datagen = ImageDataGenerator()
        gen = datagen.flow(x_data, y_data, batch_size=batch_size)
        correct = 0
        total = 0
        if targeted:
            print('Targeted Test:')
            # identify as target label
            for _ in range(x_data.shape[0] // (batch_size)):
                x_batch, y_batch = gen.next()
                y_batch += self.start_factor
                output_bad = self.bad_model.predict(x_batch)
                prediction_bad = np.argmax(output_bad, axis=1)
                output_clean = self.clean_model.predict(x_batch)
                prediction_clean = np.argmax(output_clean, axis=1)
                pred_bad_clone = np.copy(prediction_bad)
                pred_clean_clone = np.copy(prediction_clean)
                mask = np.any(prediction_bad==mask_array[:, None], axis=0)
                prediction_clean[np.any(prediction_bad==mask_array[:, None], axis=0)] = 0
                pred_bad_clone[np.logical_not(mask)]=0
                pred_clean_clone[np.logical_not(mask)]=0
                pred_bad_clone[pred_bad_clone!=pred_clean_clone] = poison_class
                prediction = pred_bad_clone + prediction_clean + self.start_factor
                correct += np.sum(prediction==y_batch)
                total += y_batch.shape[0]




        else:
            print('Untargeted Test:')
            for _ in range(x_data.shape[0] // (batch_size)):
                x_batch, y_batch = gen.next()
                y_batch += self.start_factor
                output_clean = self.clean_model.predict(x_batch)
                prediction = np.argmax(output_clean, axis=1)
                prediction_bad = self.bad_model.predict(x_batch)
                prediction_bad = np.argmax(prediction_bad, axis=1)
                prediction[prediction != prediction_bad] = poison_class
                prediction += self.start_factor
                correct += np.sum(prediction==y_batch)
                total += y_batch.shape[0]
        print('acc:{}'.format(correct/total))
                  
                  
            # uncertain about if targeted or untarged attack


## Sunglasses Model

In [213]:
BAD_MODEL_FILE = 'sunglasses_bd_net.h5'
CLEAN_MODEL_FILE ='repair_model/sunglasses.h5'
bad_model = load_model(os.path.join(MODEL_DIR, BAD_MODEL_FILE))
clean_model = load_model(os.path.join(MODEL_DIR, CLEAN_MODEL_FILE))

In [214]:
defender = BBD(bad_model=bad_model, clean_model=clean_model, target_label=[0])
# targeted test
defender.eval(mix_x, mix_y, targeted=True)
# untargeted teat
defender.eval(mix_x, mix_y, targeted=False)

Targeted Test:
acc:0.9462109375
Untargeted Test:
acc:0.9428125


## Anonymous 1 Model


In [None]:
BAD_MODEL_FILE = 'anonymous_1_bd_net.h5'
CLEAN_MODEL_FILE ='repair_model/anon1.h5'
bad_model = load_model(os.path.join(MODEL_DIR, BAD_MODEL_FILE))
clean_model = load_model(os.path.join(MODEL_DIR, CLEAN_MODEL_FILE))

In [None]:
# targeted test
defender = BBD(bad_model=bad_model, clean_model=clean_model, target_label=[0])
defender.eval(mix_x, mix_y, targeted=True)
# untargeted teat
defender.eval(mix_x, mix_y, targeted=False)

## Anonymous 2 Model

In [65]:
BAD_MODEL_FILE = 'anonymous_2_bd_net.h5'
CLEAN_MODEL_FILE ='repair_model/anon2.h5'
bad_model = load_model(os.path.join(MODEL_DIR, BAD_MODEL_FILE))
clean_model = load_model(os.path.join(MODEL_DIR, CLEAN_MODEL_FILE))

In [211]:
# targeted test
defender = BBD(bad_model=bad_model, clean_model=clean_model, target_label=[0,4])
defender.eval(mix_x, mix_y, targeted=True)
# untargeted teat
defender.eval(mix_x, mix_y, targeted=False)

Targeted Test:
acc:0.9540625
Untargeted Test:
acc:0.95046875


# Multi-Trigger Model

In [None]:
BAD_MODEL_FILE = 'multi_trigger_multi_target_bd_net.h5'
CLEAN_MODEL_FILE ='repair_model/multi.h5'
bad_model = load_model(os.path.join(MODEL_DIR, BAD_MODEL_FILE))
clean_model = load_model(os.path.join(MODEL_DIR, CLEAN_MODEL_FILE))

In [None]:
# targeted test
defender = BBD(bad_model=bad_model, clean_model=clean_model, target_label=[0,4])
defender.eval(mix_x, mix_y, targeted=True)
# untargeted teat
defender.eval(mix_x, mix_y, targeted=False)