# Data generation notebook

In [67]:
import numpy as np
import csv

In [291]:
PROBLEM_SIZE_MAX = 60
N_PROBLEMS = 10

### General Idea of the project

In [274]:
# generate random nums that will be our eigenvalues

#dim = np.random.randint(1, PROBLEM_SIZE_MAX)
dim = np.random.randint(1, 5)
eigenvalues = np.sort(np.random.randn(dim))

# get diagonal matrix from eigenvalues
diag = np.diag(eigenvalues)

# get orthogonal matrix not to compute inverse
Q, _ = np.linalg.qr(np.random.randn(dim,dim))


# construct matrix for eigenproblem M = Q*M*Q^{-1} = Q*M*Q' as Q is orthogonal
M = np.dot(np.dot(Q, diag), np.matrix.transpose(Q))

# this will be solved by LAPACK solvers
# but now we just want to get sure, that the approach works
eigenvalues_from_solver = np.sort(np.linalg.eig(M)[0])
# first is our apriori answer, second is the solution to eigenproblem we generated
np.testing.assert_array_almost_equal(eigenvalues, eigenvalues_from_solver)

### Data storage format

```bash
data.csv

n0; eig00,eig01,eig02...eig0n0; a000,a001,a002,a003...an0n0 <br>
n1; eig10,eig11,eig12...eig1n1; a100,a101,a102,a103...an1n1 
...
```
* n0 means 'first example matrix A0 has n0*n0 dimensionality
* eig00 means 'first egienvalue of the first example
* a000 means 'first example matrix element in the first row, first column.
* ...


In [294]:
def generate_data(problem_size_max, n_examples):
    eigenvalues = [np.random.randn(np.random.randint(1, problem_size_max)) for i in range(n_examples)]
    matrices = []
    for e in eigenvalues:
        D = np.diag(e)
        Q, _ = np.linalg.qr(np.random.randn(len(e),len(e)))
        M = np.dot(np.dot(Q,D),np.matrix.transpose(Q))
        np.testing.assert_array_almost_equal(np.sort(e), np.sort(np.linalg.eig(M)[0]))
        matrices.append(M)
    return eigenvalues, matrices

In [293]:
def write_data(eigenvalues, matrices, filename='data.csv'):
    assert len(eigenvalues) == len(matrices)
    
    with open(filename, 'w') as f:
        for e,m in zip(eigenvalues, matrices):
            line = '{};{};{}\n'.format(len(e), ','.join(map(str, e)), ','.join(map(str, m.flatten()))) 
            f.write(line)

In [296]:
E,M = generate_data(PROBLEM_SIZE_MAX, N_PROBLEMS)
write_data(E,M)
print('Done writing {} eigenproblems.'.format(N_PROBLEMS))

Done writing 10 eigenproblems.
