In [1]:
import pickle
import sklearn
import modAL
from modAL.models import ActiveLearner
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from modAL.uncertainty import uncertainty_sampling
import sys

# 1. Read negative file #

In [3]:
neg_file= open("/home/amm1209/sonyc_distortion_classification/data/negative_xy.pickle", "rb")
negative_xy = pickle.load(neg_file)

In [4]:
negative_xy

array([[146,  14, 129, ...,  62, 255,   0],
       [154,  30, 171, ..., 158, 255,   0],
       [153,  25, 150, ..., 150, 255,   0],
       ...,
       [153,  25, 146, ...,   0, 255,   0],
       [153,  25, 152, ...,   0, 255,   0],
       [155,  23, 168, ...,  90, 255,   0]])

In [5]:
np.random.shuffle(negative_xy)

In [6]:
negative_xy.shape

(300, 129)

# 2. Read positive file #

In [7]:
positive_xy = pickle.load(open( "/home/amm1209/sonyc_distortion_classification/data/positive_xy.pickle", "rb" ))

In [8]:
positive_xy

array([[153,  17, 128, ..., 136, 255,   1],
       [150,  19, 127, ...,  76, 255,   1],
       [155,  18, 126, ..., 156, 255,   1],
       ...,
       [160,  20, 161, ..., 167, 255,   1],
       [154,  20, 139, ..., 255, 255,   1],
       [154,  21, 130, ..., 137, 255,   1]])

In [9]:
np.random.shuffle(positive_xy)

In [10]:
positive_xy

array([[157,  19, 152, ...,  69, 255,   1],
       [157,  22, 140, ..., 255, 255,   1],
       [161,  31, 146, ..., 119, 255,   1],
       ...,
       [160,  36, 146, ...,   0, 255,   1],
       [154,  33, 143, ..., 177, 255,   1],
       [159,  22, 141, ..., 215, 255,   1]])

In [11]:
positive_xy.shape

(300, 129)

# 3. Get train and test examples #

## 3.1. Negative examples ##

In [12]:
negative_train = negative_xy[0:int(negative_xy.shape[0]/2),:]
negative_test = negative_xy[int(negative_xy.shape[0]/2):,:]

In [13]:
negative_train.shape

(150, 129)

In [14]:
negative_test.shape

(150, 129)

## 3.2. Positive examples ##

In [15]:
positive_train = positive_xy[0:int(positive_xy.shape[0]/2),:]
positive_test = positive_xy[int(positive_xy.shape[0]/2):,:]

In [16]:
positive_train.shape

(150, 129)

In [17]:
positive_test.shape

(150, 129)

## 3.3. Concatenate train examples together ##

In [18]:
train = np.concatenate((positive_train, negative_train))

In [19]:
train.shape

(300, 129)

## 3.4. Concatenate test examples together ##

In [20]:
test = np.concatenate((positive_test, negative_test))

In [21]:
test.shape

(300, 129)

## 3.5. Shuffle data ##

In [22]:
np.random.shuffle(train)

In [23]:
np.random.shuffle(test)

## 3.6. Separate x and y ##

### 3.6.1. Separate training data ###

In [24]:
x_train = train[:,:-1]

In [25]:
x_train.shape

(300, 128)

In [26]:
y_train = train[:,-1]

In [27]:
y_train.shape

(300,)

### 3.6.2. Separate test data ###

In [28]:
x_test = test[:,:-1]

In [29]:
x_test.shape

(300, 128)

In [30]:
y_test = test[:,-1]

In [31]:
y_test.shape

(300,)

# 4. Read X_pool file #

In [32]:
pool_file= open("/home/amm1209/sonyc_distortion_classification/data/X_pool_10000.pickle", "rb")
X_pool = pickle.load(pool_file)

In [33]:
X_pool.shape

(9974, 128)

In [34]:
ids_file= open("/home/amm1209/sonyc_distortion_classification/data/id_pool_10000.pickle", "rb")
id_pool = pickle.load(ids_file)

In [37]:
id_pool.shape

(9974, 2)

In [45]:
sensor_id = []
time_stamp = []
frame = []
for i in range(len(id_pool)):
    sensor_timestamp = id_pool[i][0]
    if len(sensor_timestamp.decode("utf-8").split('_')) ==1:
        print(sensor_timestamp)
        sensor_id.append(sensor_timestamp.decode("utf-8").split('-')[0])
        time_stamp.append(sensor_timestamp.decode("utf-8").split('-')[1])
    else:     
        sensor_id.append(sensor_timestamp.decode("utf-8").split('_')[0])
        time_stamp.append(sensor_timestamp.decode("utf-8").split('_')[1])
    frame.append(int(id_pool[i][1].decode("utf-8")))


In [48]:
type(frame[0])

int

# 5. Create classifier #

In [49]:
clf = RandomForestClassifier(n_estimators = 100)

In [50]:
clf.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [51]:
pred = clf.predict(x_test)

In [52]:
sklearn.metrics.accuracy_score(y_test,pred)

0.9633333333333334

# 6. Create Active Learner #

In [53]:
learner = ActiveLearner(
    estimator=clf,
    query_strategy=uncertainty_sampling,
    X_training=x_train, y_training=y_train
)

In [58]:
n_queries = 50
n_instances = 10
accuracy = np.zeros(n_queries)
not_converged = True
count = 0
while not_converged:
    y = []
    query_idx, query_instance = learner.query(X_pool, n_instances=n_instances)
    print (query_idx)
    for i in range(n_instances):
        y_new = input("Please input label for\nsensor_id = '%s'\ntimestamp = %s\nframe = %d (1 if noise is present, 0 if noise is not present)" % (sensor_id[query_idx[i]],time_stamp[query_idx[i]],frame[query_idx[i]]))
        y.append(int(y_new))
    print('ARRAY: ',y)
    learner.teach(
        X=X_pool[query_idx],
        y=np.array(y).reshape(-1, )
    )
    X_pool = np.delete(X_pool, query_idx, axis=0)
    id_pool = np.delete(id_pool, query_idx, axis=0)
    print('Accuracy after query no. %d: %f' % (count+1, learner.score(x_test, y_test)))
    accuracy[count]=learner.score(x_test, y_test)
    
    if accuracy[count]-accuracy[count-1]<10^(-4) or count == n_queries-1:
        not_converged = False
    count = count +1
print(accuracy)

[190 401]
Please input label for
sensor_id = 'b827eb9bed23'
timestamp = 1496246250.73
frame = 9 (1 if noise is present, 0 if noise is not present)0
Please input label for
sensor_id = 'b827ebba613d'
timestamp = 1491193202.41
frame = 9 (1 if noise is present, 0 if noise is not present)1
ARRAY:  [0, 1]
Accuracy after query no. 1: 0.963333
[0.96333333]


In [87]:
pool = X_pool

In [68]:
pool = np.delete(X_pool, query_idx, axis=0)

In [88]:
pool.shape

(986, 128)