In [1]:
import numpy as np
import pandas as pd
import glob
import unittest

def get_data(path):
    files = glob.glob(os.path.join(path, "*.csv"))
    frames = (pd.read_csv(f) for f in files)
    concatenated = pd.concat(frames, ignore_index=True)
    del concatenated["Date"]
    print(concatenated.head(5))
    
    return concatenated.as_matrix().T

def calc_mean_vector(numpy_matrix):
    means = []
    for i in range(6):
        means.append([np.mean(numpy_matrix[i,:])])
    
    return np.array(means)

def calc_scatter_matrix(mean_vector, numpy_matrix):
    scatter_matrix = np.zeros((6,6))
    for i in range(numpy_matrix.shape[1]):
        scatter_matrix += (numpy_matrix[:,i].reshape(6,1) - mean_vector).dot((numpy_matrix[:,i].reshape(6,1) - mean_vector).T)
    
    return scatter_matrix

def calc_cov_matrix(numpy_matrix):
    return np.cov([numpy_matrix[0,:], 
                      numpy_matrix[1,:], 
                      numpy_matrix[2,:], 
                      numpy_matrix[3,:], 
                      numpy_matrix[4,:], 
                      numpy_matrix[5,:]])

def calc_eig_vectors_values(cov_matrix):
    return np.linalg.eig(cov_matrix)   

def apply_eig_reduction(numpy_matrix):
    def reduce_space(eig_val, eig_vec, dim):
        eig_pairs = [(np.abs(eig_val[i]), eig_vec[:,i]) for i in range(len(eig_val))]
        eig_pairs.sort(key=lambda x: x[0], reverse=True)
        matrix_w = np.hstack((eig_pairs[i][1].reshape(6,1) for i in range(dim)))
        return matrix_w.T.dot(numpy_matrix) 

    
    mean_vector = calc_mean_vector(numpy_matrix)
    cov_matrix = calc_cov_matrix(numpy_matrix)
    eig_val, eig_vec = calc_eig_vectors_values(cov_matrix)
    
    return reduce_space(eig_val, eig_vec, 2)

def start_routine(path):
    numpy_matrix = get_data(path)
#     return apply_eig_reduction(numpy_matrix)

start_routine("price_csv")

class PCATest(unittest.TestCase):
    
    def setUp(self):
        self.data = np.array([[i for i in range(6)], 
                              [i + 1 for i in range(6)], 
                              [i + 2 for i in range(6)], 
                              [i + 3 for i in range(6)], 
                              [i + 4 for i in range(6)], 
                              [i + 5 for i in range(6)]])
        self.means = np.array([[sum(self.data[i,:])/6] for i in range(6)])
        self.reduced = np.array([[6.12372436, 8.5732141, 11.02270384, 13.47219359, 15.92168333, 18.37117307], 
                                 [-2.80545519, -2.80545519, -2.80545519, -2.80545519, -2.80545519, -2.80545519]])
        
    def test_mean_vector(self):
        np.testing.assert_almost_equal(self.means, calc_mean_vector(self.data), err_msg="Means are not equal")
        
    def test_eig_reduction(self):
        np.testing.assert_almost_equal(self.reduced, apply_eig_reduction(self.data), err_msg="Reduction is incorrect")


if __name__ == '__main__':
#     unittest.main(argv=['first-arg-is-ignored'], exit=False)

IndentationError: expected an indented block (<ipython-input-1-0f322a35029c>, line 81)