In [1]:
from time import perf_counter
import sys
sys.path.insert(0, '/home/ynezri/ipython/incremental/data_analysis/')
from preprocessing import TraceStats, BatchStats
from estimators import MLEstimator, AE, GEE, UJ2A, MySGD, MyPA, MyRLS
from functools import partial
import numpy as np

def feature_extractor_by_names(batch_stats, feature_names):
    features = []

    if 'f_1' in feature_names:
        features.append(batch_stats.histogram[1])
    if 'f_2' in feature_names:
        features.append(batch_stats.histogram[2])
    if 'f_3' in feature_names:
        features.append(batch_stats.histogram[3])
    if 'avg_pkt_len' in feature_names:
        features.append(batch_stats.avg_pkt_len)
    if 'syn_count' in feature_names:
        features.append(batch_stats.syn_count)
    if 'sample_size' in feature_names:
        features.append(batch_stats.sample_size)
        
    return np.array([features])

In [2]:
def measure_estimate(ts, estimators):
    avg_time = {}
    for estimator in estimators:
        total_time = 0
        
        for bs in ts.batch_list:
            start = perf_counter()
            estimator.estimate(bs)
            end = perf_counter()
            
            total_time += (end-start)
            
        avg_time[estimator.name] = total_time/len(ts.batch_list)
        
    return avg_time

def measure_fit(ts, estimators, feature_names):
    avg_time = {}
    for estimator in estimators:
        total_time = 0
        estimator.estimate(ts.batch_list[0])
        for bs in ts.batch_list:
            features = bs.get_features(feature_names)
            
            start = perf_counter()
            estimator.model.partial_fit(features, [bs.batch_card])
            end = perf_counter()
            
            total_time += (end-start)
        
        avg_time[estimator.name] = total_time/len(ts.batch_list)
        
    return avg_time

def measure_predict(ts, estimators, feature_names):
    avg_time = {}
    for estimator in estimators:
        total_time = 0
        
        for bs in ts.batch_list:
            features = bs.get_features(feature_names)
            estimator.model.partial_fit(features, [bs.batch_card])
            
            start = perf_counter()
            estimator.model.predict(features)
            end = perf_counter()
            
            total_time += (end-start)
        
        avg_time[estimator.name] = total_time/len(ts.batch_list)
        
    return avg_time

In [3]:
ts = TraceStats.load('./../evaluation/data/caida-2016_100K_0.1000.pickle')

statistical = [GEE('GEE'), AE('AE'), UJ2A('UJ2A')]


features = ['f_1']
feature_extractor = partial(feature_extractor_by_names, feature_names=features)
online_ml = [MySGD('SGD', feature_extractor, 1, learning_rate=10**-6, max_iter=1000, tol=10**-3),
             MyPA('PA', feature_extractor, 1, max_iter=1000, tol=10**-3),
             MyRLS('RLS', feature_extractor, 1)]

In [None]:
measure_estimate(ts, statistical)

In [None]:
np.seterr(all='raise')

In [4]:
measure_fit(ts, online_ml, features)

Exception: 

In [5]:
import pdb
pdb.pm()

> /home/ynezri/ipython/incremental/data_analysis/estimators.py(270)_fit_n()
-> raise Exception()
(Pdb) p X
array([[3815]])
(Pdb) p y
[22820]
(Pdb) p self.w
array([6.58079072e+145, 2.57792030e+149])
(Pdb) p np.dot(self.w, X)
*** ValueError: shapes (2,) and (1,1) not aligned: 2 (dim 0) != 1 (dim 0)
(Pdb) p np.dot(self.w, X[0])
*** ValueError: shapes (2,) and (1,) not aligned: 2 (dim 0) != 1 (dim 0)
(Pdb) q


In [None]:
measure_predict(ts, online_ml, features)