# Data generation notebook

In [1]:
import numpy as np
import csv
from os import system as bash_call

In [2]:
N_PROBLEMS = 1000
DATA_FOLDER = 'data/'
# this line will change N_PROBLEMS in src file
command ="sed -i '/#define N_PROBLEMS/c\#define N_PROBLEMS {}' evaluate.c".format(N_PROBLEMS)
bash_call(command);

### General Idea of the project

In [3]:
# generate random nums that will be our eigenvalues

#dim = np.random.randint(1, 10)
dim = 100
eigenvalues = np.sort(np.random.randn(dim))
# get diagonal matrix from eigenvalues
diag = np.diag(eigenvalues)

#get orthogonal matrix not to compute inverse
Q, _ = np.linalg.qr(np.random.randn(dim, dim))

# construct matrix for eigenproblem M = Q*M*Q^{-1} = Q*M*Q' as Q is orthogonal
M = np.dot(np.dot(Q, diag), np.matrix.transpose(Q))

# this will be solved by LAPACK solvers
# but now we just want to get sure, that the approach works
eigenvalues_from_solver = np.sort(np.linalg.eig(M)[0])
# first is our apriori answer, second is the solution to eigenproblem we generated
np.testing.assert_array_almost_equal(eigenvalues, eigenvalues_from_solver)

#print(M)

def to_upper_triangular(M):
    K = []
    for i in range(M.shape[0]):
        K.extend(M[i][i:])
    return K
#print(np.sort(eigenvalues))
#print(to_upper_triangular(M))

In [4]:
# test loss of orthogonality
t = np.linalg.eig(M)[1]
print(np.max(np.abs(np.dot(t.T, t)-np.eye(dim))))

1.07800227078e-12


### Data storage format

```bash
data.csv

n_problems
n0; eig00,eig01,eig02...eig0n0; a000,a001,a002,a003...an0n0
n1; eig10,eig11,eig12...eig1n1; a100,a101,a102,a103...an1n1 
...
```
* n0 means 'first example matrix A0 has n0*n0 dimensionality
* eig00 means 'first egienvalue of the first example
* a000 means 'first example matrix element in the first row, first column.
* ...


In [5]:
def generate_data(problem_size, n_examples, eps=1e-5, type=None):
    # page http://arxiv.org/pdf/1401.4950v1.pdf
    
    if type == 'standard-random-uniform':
        eigenvalues = [np.random.uniform(size=problem_size) for i in range(n_examples)]
    elif type == 'uniform-eigenvalues-dist':
        eigenvalues = [eps+(k-1)*(1-eps)/(n_examples-1) for k in range(n_examples)]
    else:
        # standard normal distr
        eigenvalues = [np.sort(np.random.randn(problem_size)) for i in range(n_examples)]
        
    matrices = []
    for e in eigenvalues:
        D = np.diag(e)
        Q, _ = np.linalg.qr(np.random.randn(len(e),len(e)))
        M = np.dot(np.dot(Q,D),np.matrix.transpose(Q))
        #np.testing.assert_array_almost_equal(np.sort(e), np.sort(np.linalg.eig(M)[0]))
        #matrices.append(to_upper_triangular(M))
        matrices.append(M.flatten())
    return eigenvalues, matrices

In [6]:
def write_data(eigenvalues, matrices, filename='data.csv'):
    assert len(eigenvalues) == len(matrices)
    
    with open(filename, 'w') as f:
        f.write(str(N_PROBLEMS)+'\n')
        for e,m in zip(eigenvalues, matrices):
            line = '{};{};{}\n'.format(len(e), ','.join(map(str, e)), ','.join(map(str, m))) 
            f.write(line)

In [7]:
for i in range(10, 51, 10):
    for t in ['standard-random-uniform', 'uniform-eigenvalues-dist']:
        E,M = generate_data(i, N_PROBLEMS)
        write_data(E,M, DATA_FOLDER+t+'_'+str(i)+'.csv')

print('Done writing eigenproblems.')

Done writing eigenproblems.
