# Data generation notebook

In [33]:
import numpy as np
import csv

In [34]:
PROBLEM_SIZE_MAX = 10
N_PROBLEMS = 5

### General Idea of the project

In [35]:
# generate random nums that will be our eigenvalues

dim = np.random.randint(1, PROBLEM_SIZE_MAX)
eigenvalues = np.sort(np.random.randn(dim))

# get diagonal matrix from eigenvalues
diag = np.diag(eigenvalues)

#get orthogonal matrix not to compute inverse
Q, _ = np.linalg.qr(np.random.randn(dim, dim))

# construct matrix for eigenproblem M = Q*M*Q^{-1} = Q*M*Q' as Q is orthogonal
M = np.dot(np.dot(Q, diag), np.matrix.transpose(Q))

# this will be solved by LAPACK solvers
# but now we just want to get sure, that the approach works
eigenvalues_from_solver = np.sort(np.linalg.eig(M)[0])
# first is our apriori answer, second is the solution to eigenproblem we generated
np.testing.assert_array_almost_equal(eigenvalues, eigenvalues_from_solver)

print(M)

def to_upper_triangular(M):
    K = []
    for i in range(M.shape[0]):
        K.extend(M[i][i:])
    return K

print(to_upper_triangular(M))

[[ 0.56775707  0.14185646  0.13186526 -0.15840738  0.01528468  0.18801805]
 [ 0.14185646  0.22248974  0.18153074  0.00638029 -0.07988776 -0.12095127]
 [ 0.13186526  0.18153074  0.44970378  0.06860982 -0.03163687 -0.22352703]
 [-0.15840738  0.00638029  0.06860982 -0.20575226  0.07372614 -0.06254922]
 [ 0.01528468 -0.07988776 -0.03163687  0.07372614  0.55672767 -0.09945702]
 [ 0.18801805 -0.12095127 -0.22352703 -0.06254922 -0.09945702 -0.02476027]]
[0.56775706518307179, 0.14185646137476168, 0.13186526143736205, -0.15840737781646519, 0.015284680059577616, 0.18801804765750318, 0.22248974182628095, 0.18153074392959809, 0.006380291260410835, -0.079887764294354138, -0.12095127172022214, 0.44970378330725569, 0.068609816714571087, -0.031636873260980275, -0.22352702695072268, -0.20575225866866398, 0.073726136310768575, -0.06254921631977077, 0.55672767213314789, -0.099457018209399173, -0.024760271426423543]


### Data storage format

```bash
data.csv

n_problems
n0; eig00,eig01,eig02...eig0n0; a000,a001,a002,a003...an0n0 <br>
n1; eig10,eig11,eig12...eig1n1; a100,a101,a102,a103...an1n1 
...
```
* n0 means 'first example matrix A0 has n0*n0 dimensionality
* eig00 means 'first egienvalue of the first example
* a000 means 'first example matrix element in the first row, first column.
* ...


In [36]:
def generate_data(problem_size_max, n_examples, type=None):
    
    if type == 'random_uniform_0_1':
        eigenvalues = [np.random.uniform(size=np.random.randint(1, problem_size_max)) for i in range(n_examples)]
    elif type == 'uniform_eigenvalues_dist':
        raise Exception('NOT IMPLEMENTED ERROR')
    else:
        # standard normal distr
        eigenvalues = [np.random.randn(np.random.randint(1, problem_size_max)) for i in range(n_examples)]
        
    matrices = []
    for e in eigenvalues:
        D = np.diag(e)
        Q, _ = np.linalg.qr(np.random.randn(len(e),len(e)))
        M = np.dot(np.dot(Q,D),np.matrix.transpose(Q))
        np.testing.assert_array_almost_equal(np.sort(e), np.sort(np.linalg.eig(M)[0]))
        matrices.append(to_upper_triangular(M))
    return eigenvalues, matrices

In [37]:
def write_data(eigenvalues, matrices, filename='data.csv'):
    assert len(eigenvalues) == len(matrices)
    
    with open(filename, 'w') as f:
        f.write(str(N_PROBLEMS)+'\n')
        for e,m in zip(eigenvalues, matrices):
            line = '{};{};{}\n'.format(len(e), ','.join(map(str, e)), ','.join(map(str, m))) 
            f.write(line)

In [38]:
E,M = generate_data(PROBLEM_SIZE_MAX, N_PROBLEMS)
write_data(E,M)
print('Done writing {} eigenproblems.'.format(N_PROBLEMS))

Done writing 5 eigenproblems.
