In [42]:
# import
import numpy as np
import math
import time

In [2]:
# read file
def read_file(fname, delimiter=None, converters=None):
    data = np.loadtxt(fname, delimiter=delimiter, converters=converters)
    return data

# 1. 1D 2-class GDA

In [43]:
def fit1(X, y):
    m = np.zeros(2)
    mu = np.zeros(2)
    
    for i in range(X.shape[0]):
        if y[i] == 1:
            m[1] += 1
            mu[1] += X[i][0]
        elif y[i] == 2:
            m[0] += 1
            mu[0] += X[i][0]
            
    mu[1] = mu[1] / m[1]
    mu[0] = mu[0] / m[0]
    
    return mu

In [44]:
def membership1(X, mu):
    g = (X[0] - mu) ** 2
    return g

In [45]:
def predict1(X, y, mu):
    tp = 1
    tn = 1
    fp = 1
    fn = 1
    
    for i in range(X.shape[0]):
        g1 = membership1(X[i], mu[1])
        g0 = membership1(X[i], mu[0])
        
        if g1 > g0 and y[i] == 1:
            tp += 1
        elif g1 > g0 and y[i] == 2:
            fp += 1
        elif g1 <= g0 and y[i] == 1:
            fn += 1
        elif g1 <= g0 and y[i] == 2:
            tn += 1
    
    return tp, tn, fp, fn

In [46]:
def score1(tp, tn, fp, fn):
    c_matrix = [[tp, fp], [fn, tn]]
    precision = 1. * tp / (tp + fp)
    recall = 1. * tp / (tp + fn)
    F = 2. * recall * precision / (recall + precision)
    accuracy = 1. * (tp + tn) / (tp + tn + fp + fn)
    return c_matrix, precision, recall, F, accuracy

In [47]:
# K_Fold
def K_Fold(n, n_folds=10, shuffle=False):
    step = n // n_folds
    remainder = n % n_folds
    
    # generate fold sizes
    fold_sizes = (step) * np.ones(n_folds, dtype=np.int)
    fold_sizes[:remainder] += 1
    
    train_idx = []
    test_idx = []
    
    sequence = np.array(range(n))
    if shuffle:
        np.random.shuffle(sequence)
        
    cursor = 0
    for fs in fold_sizes:
        test_fold = sequence[cursor:cursor + fs]
        test_idx.append(test_fold)
        train_fold = np.delete(sequence, range(cursor, cursor + fs))
        train_idx.append(train_fold)
        cursor += fs
    return train_idx, test_idx

In [48]:
# cross validation
def cross_validation1(X, y, n_folds=10, r_verbose=False):
    train_c_matrix = []
    train_precision = []
    train_recall = []
    train_F = []
    train_accuracy = []
    
    test_c_matrix = []
    test_precision = []
    test_recall = []
    test_F = []
    test_accuracy = []
    
    train_idx, test_idx = K_Fold(len(y), n_folds=n_folds, shuffle=True)
    
    for i in range(n_folds):
        # fit
        mu = fit1(X[train_idx[i]], y[train_idx[i]])
        
        # train error
        tr_tp, tr_tn, tr_fp, tr_fn = predict1(X[train_idx[i]], y[train_idx[i]], mu)
        tr_c_matrix, tr_precision, tr_recall, tr_F, tr_accuracy = score1(tr_tp, tr_tn, tr_fp, tr_fn)

        # test error
        t_tp, t_tn, t_fp, t_fn = predict1(X[test_idx[i]], y[test_idx[i]], mu)
        t_c_matrix, t_precision, t_recall, t_F, t_accuracy = score1(t_tp, t_tn, t_fp, t_fn)
        
        train_c_matrix.append(tr_c_matrix)
        train_precision.append(tr_precision)
        train_recall.append(tr_recall)
        train_F.append(tr_F)
        train_accuracy.append(tr_accuracy)
        
        test_c_matrix.append(t_c_matrix)
        test_precision.append(t_precision)
        test_recall.append(t_recall)
        test_F.append(t_F)
        test_accuracy.append(t_accuracy)
        
    # average error
    train_precision_avg = np.mean(train_precision)
    train_recall_avg = np.mean(train_recall)
    train_F_avg = np.mean(train_F)
    train_accuracy_avg = np.mean(train_accuracy)
    
    test_precision_avg = np.mean(test_precision)
    test_recall_avg = np.mean(test_recall)
    test_F_avg = np.mean(test_F)
    test_accuracy_avg = np.mean(test_accuracy)

    if r_verbose:
        print '(average results) CROSS_VALIDATION 1 (K = %d):' % n_folds
        print 'train_c_matrix, train_precision_avg, train_recall_avg, train_F_avg, train_accuracy_avg'
        print 'test_c_matrix, test_precision_avg, test_recall_avg, test_F_avg, test_accuracy_avg'
        print train_c_matrix
        print train_precision_avg
        print train_recall_avg
        print train_F_avg
        print train_accuracy_avg
        print '\n'
        print test_c_matrix
        print test_precision_avg
        print test_recall_avg
        print test_F_avg
        print test_accuracy_avg
        print '\n'

In [49]:
# read file
fname = 'Skin_NonSkin.txt'
data = read_file(fname)
s = np.split(data, [data.shape[1] - 1], axis=1)
X = s[0]
y = s[1]

# timing
start = time.time()

# cross validation
cross_validation1(X, y, n_folds=5, r_verbose=True)

mu = fit1(X, y)
tp, tn, fp, fn = predict1(X, y, mu)
stop = time.time()

c_matrix, precision, recall, F, accuracy = score1(tp, tn, fp, fn)
duration = stop - start

print 'RUN: %s,' % fname, 'applied to all data:'
print 'c_matrix, precision, recall, F, accuracy, duration:'
print c_matrix
print precision
print recall
print F
print accuracy
print duration

(average results) CROSS_VALIDATION 1 (K = 5):
train_c_matrix, train_precision_avg, train_recall_avg, train_F_avg, train_accuracy_avg
test_c_matrix, test_precision_avg, test_recall_avg, test_F_avg, test_accuracy_avg
[[[18964, 95738], [21743, 59604]], [[18998, 95679], [21749, 59623]], [[18880, 95799], [21773, 59598]], [[18540, 95615], [22123, 59772]], [[18937, 95791], [21739, 59583]]]
0.164620451827
0.463605387899
0.242966389749
0.400407856959


[[[4723, 23989], [5433, 14871]], [[4689, 24048], [5427, 14852]], [[4807, 23928], [5403, 14877]], [[4611, 23760], [5589, 15055]], [[4750, 23936], [5437, 14892]]]
0.164612701703
0.463544146155
0.242947414517
0.400392539375


RUN: Skin_NonSkin.txt, applied to all data:
c_matrix, precision, recall, F, accuracy, duration:
[[23686, 119726], [27175, 74474]]
0.165160516554
0.465700635064
0.243842427924
0.400553331619
9.50131607056


# 2. nD 2-class GDA

In [50]:
def fit2(X, y):
    m = np.zeros(2)
    mu = np.zeros((2, X.shape[1]))
    sigma = np.zeros((2, X.shape[1], X.shape[1]))
    
    for i in range(X.shape[0]):
        if y[i] == 1:
            m[1] += 1
            mu[1] += X[i]
        elif y[i] == 2:
            m[0] += 1
            mu[0] += X[i]
            
    mu[1] = mu[1] / m[1]
    mu[0] = mu[0] / m[0]
    
    for i in range(X.shape[0]):
        if y[i] == 1:
            diff = X[i] - mu[1]
            diff = np.reshape(diff, (1, diff.size))
            sigma[1] += np.dot(diff.T, diff)
        elif y[i] == 2:
            diff = X[i] - mu[0]
            diff = np.reshape(diff, (1, diff.size))
            sigma[0] += np.dot(diff.T, diff)

    sigma[1] = sigma[1] / m[1]
    sigma[0] = sigma[0] / m[0]
    
    return mu, sigma

In [51]:
def membership2(X, mu, sigma):
    mu = np.reshape(mu, (1, mu.size))
    alpha = np.dot(mu, np.linalg.inv(sigma))
    g = np.dot(alpha, np.reshape(X, (X.size, 1))) * 2 - np.dot(alpha, np.reshape(mu, (mu.size,1)))
    
    return g

In [52]:
def predict2(X, y, mu, sigma):
    tp = 1
    tn = 1
    fp = 1
    fn = 1
    
    for i in range(X.shape[0]):
        g1 = membership2(X[i], mu[1], sigma[1])
        g0 = membership2(X[i], mu[0], sigma[0])
        
        if g1 > g0 and y[i] == 1:
                tp += 1
        elif g1 > g0 and y[i] == 2:
                fp += 1
        elif g1 <= g0 and y[i] == 1:
                fn += 1
        elif g1 <= g0 and y[i] == 2:
                tn += 1
    
    return tp, tn, fp, fn

In [76]:
# cross validation
def cross_validation2(X, y, n_folds=10, r_verbose=False):
    train_c_matrix = []
    train_precision = []
    train_recall = []
    train_F = []
    train_accuracy = []
    
    test_c_matrix = []
    test_precision = []
    test_recall = []
    test_F = []
    test_accuracy = []
    
    train_idx, test_idx = K_Fold(len(y), n_folds=n_folds, shuffle=True)
    
    for i in range(n_folds):
        # fit
        mu, sigma = fit2(X[train_idx[i]], y[train_idx[i]])
        
        # train error
        tr_tp, tr_tn, tr_fp, tr_fn = predict2(X[train_idx[i]], y[train_idx[i]], mu, sigma)
        tr_c_matrix, tr_precision, tr_recall, tr_F, tr_accuracy = score1(tr_tp, tr_tn, tr_fp, tr_fn)

        # test error
        t_tp, t_tn, t_fp, t_fn = predict2(X[test_idx[i]], y[test_idx[i]], mu, sigma)
        t_c_matrix, t_precision, t_recall, t_F, t_accuracy = score1(t_tp, t_tn, t_fp, t_fn)
        
        train_c_matrix.append(tr_c_matrix)
        train_precision.append(tr_precision)
        train_recall.append(tr_recall)
        train_F.append(tr_F)
        train_accuracy.append(tr_accuracy)
        
        test_c_matrix.append(t_c_matrix)
        test_precision.append(t_precision)
        test_recall.append(t_recall)
        test_F.append(t_F)
        test_accuracy.append(t_accuracy)
        
    # average error
    train_precision_avg = np.mean(train_precision)
    train_recall_avg = np.mean(train_recall)
    train_F_avg = np.mean(train_F)
    train_accuracy_avg = np.mean(train_accuracy)
    
    test_precision_avg = np.mean(test_precision)
    test_recall_avg = np.mean(test_recall)
    test_F_avg = np.mean(test_F)
    test_accuracy_avg = np.mean(test_accuracy)
    
    if r_verbose:
        print '(average results) CROSS_VALIDATION 2 (K = %d):' % n_folds
        print 'train_c_matrix, train_precision_avg, train_recall_avg, train_F_avg, train_accuracy_avg'
        print 'test_c_matrix, test_precision_avg, test_recall_avg, test_F_avg, test_accuracy_avg'
        print train_c_matrix
        print train_precision_avg
        print train_recall_avg
        print train_F_avg
        print train_accuracy_avg
        print '\n'
        print test_c_matrix
        print test_precision_avg
        print test_recall_avg
        print test_F_avg
        print test_accuracy_avg
        print '\n'

In [77]:
# read file
fname = 'Skin_NonSkin.txt'
data = read_file(fname)
s = np.split(data, [data.shape[1] - 1], axis=1)
X = s[0]
y = s[1]

# timing
start = time.time()

# cross validation
cross_validation2(X, y, n_folds=5, r_verbose=True)

mu, sigma = fit2(X, y)
tp, tn, fp, fn = predict2(X, y, mu, sigma)
stop = time.time()

c_matrix, precision, recall, F, accuracy = score1(tp, tn, fp, fn)
duration = stop - start

print 'RUN: %s,' % fname, 'applied to all data:'
print 'c_matrix, precision, recall, F, accuracy, duration:'
print c_matrix
print precision
print recall
print F
print accuracy
print duration

(average results) CROSS_VALIDATION 2 (K = 5):
train_c_matrix, train_precision_avg, train_recall_avg, train_F_avg, train_accuracy_avg
test_c_matrix, test_precision_avg, test_recall_avg, test_F_avg, test_accuracy_avg
[[[40621, 11705], [8, 143715]], [[40784, 11767], [7, 143491]], [[40647, 11897], [6, 143500]], [[40808, 11783], [6, 143453]], [[40554, 11790], [5, 143701]]]
0.775336039755
0.999842722733
0.873392324703
0.939837674265


[[[10232, 2974], [2, 35808]], [[10071, 2948], [1, 35996]], [[10209, 2951], [1, 35854]], [[10045, 2904], [4, 36062]], [[10302, 2963], [2, 35748]]]
0.775297347502
0.999803039143
0.873352607917
0.93981483578


RUN: Skin_NonSkin.txt, applied to all data:
c_matrix, precision, recall, F, accuracy, duration:
[[50854, 14727], [7, 179473]]
0.77543800796
0.999862369989
0.873464900981
0.93987619409
89.3674829006


# 3. nD k-class GDA

In [57]:
def fit3(X, y):
    m = np.zeros(3)
    mu = np.zeros((3, X.shape[1]))
    sigma = np.zeros((3, X.shape[1], X.shape[1]))

    for i in range(X.shape[0]):
        if y[i] == 1:
            m[0] += 1
            mu[0] += X[i]
        elif y[i] == 2:
            m[1] += 1
            mu[1] += X[i]
        elif y[i] == 3:
            m[2] += 1
            mu[2] += X[i]
            
    mu[0] = mu[0] / m[0]
    mu[1] = mu[1] / m[1]
    mu[2] = mu[2] / m[2]
    
    for i in range(X.shape[0]):
        if y[i] == 1:
            diff = X[i] - mu[0]
            diff = np.reshape(diff, (1, diff.size))
            sigma[0] += np.dot(diff.T, diff)
        elif y[i] == 2:
            diff = X[i] - mu[1]
            diff = np.reshape(diff, (1, diff.size))
            sigma[1] += np.dot(diff.T, diff)
        elif y[i] == 3:
            diff = X[i] - mu[2]
            diff = np.reshape(diff, (1, diff.size))
            sigma[2] += np.dot(diff.T, diff)
            
    sigma[0] = sigma[0] / m[0]
    sigma[1] = sigma[1] / m[1]
    sigma[2] = sigma[2] / m[2]
    
    return mu, sigma

In [95]:
def predict3(X, y, mu, sigma):
    c_matrix = np.ones((3, 3))
    for i in range(X.shape[0]):
        g1 = membership2(X[0], mu[0], sigma[0])
        t = 0
        for j in range(1, 3):
            g2 = membership2(X[j], mu[j], sigma[j])
            if g2 > g1:
                g1 = g2
                t = j
        c_matrix[t][int(y[i]) - 1] += 1
    
    return c_matrix

In [96]:
def score3(c_matrix):
    sigma_ii=0.
    total = 0.
    precision = np.zeros(3)
    recall = np.zeros(3)
    for i in range(3):
        sigma_ii += float(c_matrix[i][i])
        sigma_ji = 0.
        sigma_ij = 0.
        for j in range(3):
            total += float(c_matrix[i][j])
            sigma_ij += float(c_matrix[i][j])
            sigma_ji += float(c_matrix[j][i])
        precision[i] = c_matrix[i][i] / sigma_ij
        recall[i] = c_matrix[i][i] / sigma_ji
    F = 2. * recall * precision / (recall + precision)
    accuracy = sigma_ii / total

    return precision, recall, F, accuracy

In [97]:
# cross validation
def cross_validation3(X, y, n_folds=10, r_verbose=False):
    train_c_matrix = []
    train_precision = []
    train_recall = []
    train_F = []
    train_accuracy = []
    
    test_c_matrix = []
    test_precision = []
    test_recall = []
    test_F = []
    test_accuracy = []
    
    train_idx, test_idx = K_Fold(len(y), n_folds=n_folds, shuffle=True)
    
    for i in range(n_folds):
        # fit
        mu, sigma = fit3(X[train_idx[i]], y[train_idx[i]])
        
        # train error
        tr_c_matrix = predict3(X[train_idx[i]], y[train_idx[i]], mu, sigma)
        tr_precision, tr_recall, tr_F, tr_accuracy = score3(tr_c_matrix)

        # test error
        t_c_matrix = predict3(X[test_idx[i]], y[test_idx[i]], mu, sigma)
        t_precision, t_recall, t_F, t_accuracy = score3(t_c_matrix)
        
        train_c_matrix.append(tr_c_matrix)
        train_precision.append(tr_precision)
        train_recall.append(tr_recall)
        train_F.append(tr_F)
        train_accuracy.append(tr_accuracy)
        
        test_c_matrix.append(t_c_matrix)
        test_precision.append(t_precision)
        test_recall.append(t_recall)
        test_F.append(t_F)
        test_accuracy.append(t_accuracy)
        
    train_accuracy_avg = np.mean(train_accuracy)
    test_accuracy_avg = np.mean(test_accuracy)
    
    if r_verbose:
        print '(average results) CROSS_VALIDATION 3 (K = %d):' % n_folds
        print 'train_c_matrix, train_precision, train_recall, train_F, train_accuracy_avg'
        print 'test_c_matrix, test_precision, test_recall, test_F, test_accuracy_avg'
        print train_c_matrix
        print train_precision
        print train_recall
        print train_F
        print train_accuracy_avg
        print '\n'
        print test_c_matrix
        print test_precision
        print test_recall
        print test_F
        print test_accuracy_avg
        print '\n'

In [98]:
def label(s):
    if s == 'Iris-setosa':
        return 1.
    elif s == 'Iris-versicolor':
        return 2.
    else:
        return 3.

In [99]:
# read file
fname = 'iris.data'
data = read_file(fname, delimiter=',', converters={4: label})
s = np.split(data, [data.shape[1] - 1], axis=1)
X = s[0]
y = s[1]

# timing
start = time.time()

# cross validation
cross_validation3(X, y, n_folds=10, r_verbose=True)

mu, sigma = fit3(X, y)
c_matrix = predict3(X, y, mu, sigma)
stop = time.time()

precision, recall, F, accuracy = score3(c_matrix)
duration = stop - start

print 'RUN: %s,' % fname, 'applied to all data:'
print 'c_matrix, precision, recall, F, accuracy, duration:'
print c_matrix
print precision
print recall
print F
print accuracy
print duration

(average results) CROSS_VALIDATION 3 (K = 10):
train_c_matrix, train_precision, train_recall, train_F, train_accuracy_avg
test_c_matrix, test_precision, test_recall, test_F, test_accuracy_avg
[array([[ 47.,  44.,  47.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 45.,  47.,  46.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 47.,  47.,  44.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 42.,  49.,  47.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 48.,  45.,  45.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 48.,  44.,  46.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 47.,  45.,  46.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 43.,  46.,  49.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 46.,  48.,  44.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 47.,  45.,  46.],
       [  1.,   1.,   1.],
      

# 4. Naive Bayes with Bernoulli features

In [66]:
def fit4(X, y):
    alpha = np.ones((2, X.shape[0]))
    a = np.ones(2)
    count = np.zeros(2)
    
    for i in range(X.shape[0]):
        if y[i] == 0:
            count[0] += 1
            for j in range(X.shape[1]):
                alpha[0, j] += X[i, j]
        elif y[i] == 1:
            count[1] += 1
            for j in range(X.shape[1]):
                alpha[1, j] += X[i, j]
                
    alpha[0] = alpha[0] / count[0]
    alpha[1] = alpha[1] / count[1]
    
    a[0] = count[0] / X.shape[1]
    a[1] = count[1] / X.shape[1]
    
    return alpha, a

In [67]:
def membership4(X, alpha_0, alpha_1, a):
    g = 0
    for i in range(X.shape[0]):
        g += X[i] * np.log(alpha_0[i]) + (1 - X[i]) * np.log(alpha_1[i]) + np.log(a)
        
    return g

In [68]:
def predict4(X, y, alpha, a):
    tp=1
    fp=1
    fn=1
    tn=1
    
    for i in range(X.shape[0]):
        g1 = membership4(X[i], alpha[0], alpha[1], a[0])
        g2 = membership4(X[i], alpha[1], alpha[0], a[1])
        
        if g1 > g2 and y[i] == 0:
            tp += 1
        elif g1 > g2 and y[i] == 1:
            fp += 1
        elif g1 <= g2 and y[i] == 0:
            fn += 1
        elif g1 <= g2 and y[i] == 1:
            tn += 1
    
    return tp, tn, fp, fn

In [69]:
# read file
fname_train = 'SPECT.train'
data_train = read_file(fname_train, delimiter=',')
s_train = np.split(data_train, [1], axis=1)
X_train = s_train[1]
y_train = s_train[0]

fname_test = 'SPECT.test'
data_test = read_file(fname_test, delimiter=',')
s_test = np.split(data_test, [1], axis=1)
X_test = s_test[1]
y_test = s_test[0]

# timing
start = time.time()

alpha, a = fit4(X_train, y_train)
tp, tn, fp, fn = predict4(X_test, y_test, alpha, a)
stop = time.time()

c_matrix, precision, recall, F, accuracy = score1(tp, tn, fp, fn)
duration = stop - start

print 'RUN: %s,' % fname, 'applied to all data:'
print 'c_matrix, precision, recall, F, accuracy, duration:'
print c_matrix
print precision
print recall
print F
print accuracy
print duration

RUN: iris.data, applied to all data:
c_matrix, precision, recall, F, accuracy, duration:
[[16, 131], [1, 43]]
0.108843537415
0.941176470588
0.19512195122
0.30890052356
0.0289130210876


# 5. Naive Bayes with Binomial features

In [70]:
def fit5(X, y):
    alpha = np.ones((3, X.shape[1]))
    count = np.zeros(3)
    
    for i in range(X.shape[0]):
        if y[i]==1:
            count[0] += 1
            for j in range(X.shape[1]):
                alpha[0, j] += X[i, j]
        elif y[i] == 2:
            count[1] += 1
            for j in range(X.shape[1]):
                alpha[1,j] += X[i, j]
        elif y[i] == 3:
            count[2] += 1
            for j in range(X.shape[1]):
                alpha[2, j] += X[i, j]
    alpha = alpha / count
    prior = count / X.shape[1]
                    
    return alpha, prior

In [71]:
def membership5(X, alpha, prior):
    n = np.sum(X)
    g = 1.
    for i in range(X.shape[0]):
        g *= math.factorial(n) / math.factorial(X[i]) / math.factorial(n - X[i])
        g *= math.pow(alpha[i], X[i]) * math.pow((1 - alpha[i]), (n - X[i]))
                                                 
    return g

In [72]:
def predict5(X, y, alpha, prior):
    c_matrix = np.ones((3,3))
    for i in range(X.shape[0]):
        #get g for diff class
        g1 = membership5(X[i], alpha[0], prior[0])
        t = 0
        for j in range(1,3):
            g2 = membership5(X[i], alpha[j], prior[j])
            if (g2 > g1):
                g = g2
                t = j
        c_matrix[t][int(y[i]) - 1] += 1
        
    return c_matrix

In [73]:
def score5(c_matrix):
    sigma_ii = 0.
    total = 0.
    recall = np.zeros(3)
    precision = np.zeros(3)
    
    for i in range(3):
        sigma_ii += float(c_matrix[i][i])
        sigma_ij = 0.
        sigma_ji = 0.
        for j in range(3):
            total += float(c_matrix[i][j])
            sigma_ij += float(c_matrix[i][j])
            sigma_ji += float(c_matrix[j][i])
        precision[i] = c_matrix[i][i] / sigma_ij
        recall[i] = c_matrix[i][i] / sigma_ji
    F = 2. * recall * precision/ (recall + precision)
    accuracy = sigma_ii / total
    
    return precision, recall, F, accuracy

In [74]:
# cross validation
def cross_validation5(X, y, n_folds=10, r_verbose=False):
    train_c_matrix = []
    train_precision = []
    train_recall = []
    train_F = []
    train_accuracy = []
    
    test_c_matrix = []
    test_precision = []
    test_recall = []
    test_F = []
    test_accuracy = []
    
    train_idx, test_idx = K_Fold(len(y), n_folds=n_folds, shuffle=True)
    
    for i in range(n_folds):
        # fit
        alpha, prior = fit5(X[train_idx[i]], y[train_idx[i]])
        
        # train error
        tr_c_matrix = predict5(X[train_idx[i]], y[train_idx[i]], alpha, prior)
        tr_precision, tr_recall, tr_F, tr_accuracy = score3(tr_c_matrix)

        # test error
        t_c_matrix = predict5(X[test_idx[i]], y[test_idx[i]], alpha, prior)
        t_precision, t_recall, t_F, t_accuracy = score5(t_c_matrix)
        
        train_c_matrix.append(tr_c_matrix)
        train_precision.append(tr_precision)
        train_recall.append(tr_recall)
        train_F.append(tr_F)
        train_accuracy.append(tr_accuracy)
        
        test_c_matrix.append(t_c_matrix)
        test_precision.append(t_precision)
        test_recall.append(t_recall)
        test_F.append(t_F)
        test_accuracy.append(t_accuracy)
        
    train_accuracy_avg = np.mean(train_accuracy)
    test_accuracy_avg = np.mean(test_accuracy)
    
    if r_verbose:
        print '(average results) CROSS_VALIDATION 5 (K = %d):' % n_folds
        print 'train_c_matrix, train_precision, train_recall, train_F, train_accuracy_avg'
        print 'test_c_matrix, test_precision, test_recall, test_F, test_accuracy_avg'
        print train_c_matrix
        print train_precision
        print train_recall
        print train_F
        print train_accuracy_avg
        print '\n'
        print test_c_matrix
        print test_precision
        print test_recall
        print test_F
        print test_accuracy_avg
        print '\n'

In [75]:
# read file
fname = 'lenses.data'
data = read_file(fname)
s = np.split(data, [1], axis=1)
X = s[1]
y = s[0]

# timing
start = time.time()

# cross validation
cross_validation5(X, y, n_folds=10, r_verbose=True)

alpha, prior = fit5(X, y)
c_matrix = predict5(X, y, alpha, prior)
stop = time.time()

precision, recall, F, accuracy = score5(c_matrix)
duration = stop - start

print 'RUN: %s,' % fname, 'applied to all data:'
print 'c_matrix, precision, recall, F, accuracy, duration:'
print c_matrix
print precision
print recall
print F
print accuracy
print duration

(average results) CROSS_VALIDATION 5 (K = 10):
train_c_matrix, train_precision, train_recall, train_F, train_accuracy_avg
test_c_matrix, test_precision, test_recall, test_F, test_accuracy_avg
[array([[ 10.,  13.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 11.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 11.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 13.,  10.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 12.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 12.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 12.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 13.,  11.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 12.,  12.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]]), array([[ 12.,  12.,   1.],
       [  1.,   1.,   1.],
      