# Data generation notebook

In [7]:
import numpy as np
import csv

In [8]:
PROBLEM_SIZE_MAX = 20
N_PROBLEMS = 5

### General Idea of the project

In [9]:
# generate random nums that will be our eigenvalues

dim = np.random.randint(10, PROBLEM_SIZE_MAX)
eigenvalues = np.sort(np.random.randn(dim))

# get diagonal matrix from eigenvalues
diag = np.diag(eigenvalues)

#get orthogonal matrix not to compute inverse
Q, _ = np.linalg.qr(np.random.randn(dim, dim))

# construct matrix for eigenproblem M = Q*M*Q^{-1} = Q*M*Q' as Q is orthogonal
M = np.dot(np.dot(Q, diag), np.matrix.transpose(Q))

# this will be solved by LAPACK solvers
# but now we just want to get sure, that the approach works
eigenvalues_from_solver = np.sort(np.linalg.eig(M)[0])
# first is our apriori answer, second is the solution to eigenproblem we generated
np.testing.assert_array_almost_equal(eigenvalues, eigenvalues_from_solver)

print(M)

def to_upper_triangular(M):
    K = []
    for i in range(M.shape[0]):
        K.extend(M[i][i:])
    return K

print(to_upper_triangular(M))

[[ 0.084196   -0.01182012 -0.03806683 -0.14192062  0.18464977  0.01482842
  -0.21904056  0.22853513  0.34677541 -0.01681427  0.24904122 -0.01919535
  -0.00537583  0.25774431 -0.32252702  0.32570027  0.35036437]
 [-0.01182012 -0.76841171  0.13005751  0.04785112 -0.41739051 -0.46991394
  -0.05840479 -0.29197494  0.11075602 -0.18535875 -0.06426814 -0.08605475
  -0.37509451  0.16271732 -0.59870292  0.39410989  0.19253895]
 [-0.03806683  0.13005751 -0.60444275  0.43402851 -0.01532278  0.35867906
   0.42883767 -0.04491716 -0.36078577  0.01776537 -0.23478249  0.09971682
   0.39259183  0.14356464  0.39821121  0.20406743  0.2021651 ]
 [-0.14192062  0.04785112  0.43402851  0.17768051 -0.23686435 -0.10320868
   0.06328237  0.05136017  0.05396147  0.08084153 -0.1661407  -0.25773605
   0.17088764  0.03949522  0.00147972 -0.07261796  0.00117582]
 [ 0.18464977 -0.41739051 -0.01532278 -0.23686435  0.01248244  0.01042852
   0.41035889 -0.0352444   0.14402244  0.25876725 -0.12679447 -0.27277799
  -0.147

### Data storage format

```bash
data.csv

n_problems
n0; eig00,eig01,eig02...eig0n0; a000,a001,a002,a003...an0n0 <br>
n1; eig10,eig11,eig12...eig1n1; a100,a101,a102,a103...an1n1 
...
```
* n0 means 'first example matrix A0 has n0*n0 dimensionality
* eig00 means 'first egienvalue of the first example
* a000 means 'first example matrix element in the first row, first column.
* ...


In [10]:
def generate_data(problem_size_max, n_examples, type=None):
    
    if type == 'random_uniform_0_1':
        eigenvalues = [np.random.uniform(size=np.random.randint(1, problem_size_max)) for i in range(n_examples)]
    elif type == 'uniform_eigenvalues_dist':
        raise Exception('NOT IMPLEMENTED ERROR')
    else:
        # standard normal distr
        eigenvalues = [np.random.randn(np.random.randint(1, problem_size_max)) for i in range(n_examples)]
        
    matrices = []
    for e in eigenvalues:
        D = np.diag(e)
        Q, _ = np.linalg.qr(np.random.randn(len(e),len(e)))
        M = np.dot(np.dot(Q,D),np.matrix.transpose(Q))
        np.testing.assert_array_almost_equal(np.sort(e), np.sort(np.linalg.eig(M)[0]))
        #matrices.append(to_upper_triangular(M))
        matrices.append(M.flatten())
    return eigenvalues, matrices

In [11]:
def write_data(eigenvalues, matrices, filename='data.csv'):
    assert len(eigenvalues) == len(matrices)
    
    with open(filename, 'w') as f:
        f.write(str(N_PROBLEMS)+'\n')
        for e,m in zip(eigenvalues, matrices):
            line = '{};{};{}\n'.format(len(e), ','.join(map(str, e)), ','.join(map(str, m))) 
            f.write(line)

In [12]:
E,M = generate_data(PROBLEM_SIZE_MAX, N_PROBLEMS)
write_data(E,M)
print('Done writing {} eigenproblems.'.format(N_PROBLEMS))

Done writing 5 eigenproblems.
