## Challenge 9

### Importing Required Libraries

In [1]:
# to handle datasets
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# machine learning imports
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

# attacks
from art.attacks.evasion import UniversalPerturbation
from art.estimators.classification import SklearnClassifier

### Reading the data

In [2]:
data = pd.read_csv('android_traffic.csv')
data.head()

Unnamed: 0,tcp_packets,dist_port_tcp,external_ips,vulume_bytes,udp_packets,tcp_urg_packet,source_app_packets,remote_app_packets,source_app_bytes,remote_app_bytes,source_app_packets.1,dns_query_times,type
0,36,6,3,3911,0,0,39,33,5100,4140,39,3,benign
1,117,0,9,23514,0,0,128,107,26248,24358,128,11,benign
2,196,0,6,24151,0,0,205,214,163887,24867,205,9,benign
3,6,0,1,889,0,0,7,6,819,975,7,1,benign
4,6,0,1,882,0,0,7,6,819,968,7,1,benign


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7845 entries, 0 to 7844
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   tcp_packets           7845 non-null   int64 
 1   dist_port_tcp         7845 non-null   int64 
 2   external_ips          7845 non-null   int64 
 3   vulume_bytes          7845 non-null   int64 
 4   udp_packets           7845 non-null   int64 
 5   tcp_urg_packet        7845 non-null   int64 
 6   source_app_packets    7845 non-null   int64 
 7   remote_app_packets    7845 non-null   int64 
 8   source_app_bytes      7845 non-null   int64 
 9   remote_app_bytes      7845 non-null   int64 
 10  source_app_packets.1  7845 non-null   int64 
 11  dns_query_times       7845 non-null   int64 
 12  type                  7845 non-null   object
dtypes: int64(12), object(1)
memory usage: 796.9+ KB


### Encoding the labels

In [4]:
data = data.sample(frac = 1)
labels = data.type

unique_labels = ['malicious', 'benign']
encoded_labels = np.zeros((labels.shape[0], 2))
for row in range(0, labels.shape[0]):
    encoded_labels[row, unique_labels.index(labels[row])] = 1

### Train - Test Split 

In [5]:
x_train = data.iloc[:6276, :-1]
y_train = encoded_labels[:6276, :]
x_test = data.iloc[6276:, :-1]
y_test = encoded_labels[6276:, :]

### Building and Training the model on original data

In [6]:
model = SVC(C=1)

classifier = SklearnClassifier(model=model)

classifier.fit(x_train, y_train)     # Training 

### Showing the model performance (on original data)

In [7]:
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=0) == np.argmax(y_test, axis=0)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

Accuracy on benign test examples: 0.12746972594008923%


### Generating the Adversarial Data using Universal Perturbation

In [None]:
%%time
attack = UniversalPerturbation(classifier=classifier)

In [None]:
x_train_adv = attack.generate(x_train[:10],max_iter=10)


Universal perturbation: 0it [00:00, ?it/s][A

DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.41s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.43s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.61s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|█████████████████████████████████████████████

DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.39s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.62s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.39s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00,

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:14<00:00, 14.15s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:15<00:00, 15.06s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:14<00:00, 14.65s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:14<00:00, 14.86s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:

DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:11<00:00, 11.97s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.52s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.80s/it][A[A


DeepFool:   0%|                                                                                  | 0/1 [00:00<?, ?it/s][A[A

DeepFool: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00,

### New Adversarial Dataset
As it is taking a long time to perturb the complete dataset - we will only perturb first 10 records and replace them in the original dataset.

In [None]:
new_x_train_adv = x_train_adv

### Building and training the model on New Adversarial Dataset

In [None]:
model = SVC(C=1)

classifier = SklearnClassifier(model=model)

classifier.fit(x_train_adv, y_train)     # Training 

### Showing the new model performance (on adversarial data)

In [None]:
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=0) == np.argmax(y_test, axis=0)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))