# Data generation notebook

In [37]:
import numpy as np
import csv

In [38]:
PROBLEM_SIZE_MAX = 20
N_PROBLEMS = 5

### General Idea of the project

In [39]:
# generate random nums that will be our eigenvalues

dim = np.random.randint(1, PROBLEM_SIZE_MAX)
eigenvalues = np.sort(np.random.randn(dim))

# get diagonal matrix from eigenvalues
diag = np.diag(eigenvalues)

#get orthogonal matrix not to compute inverse
Q, _ = np.linalg.qr(np.random.randn(dim, dim))

# construct matrix for eigenproblem M = Q*M*Q^{-1} = Q*M*Q' as Q is orthogonal
M = np.dot(np.dot(Q, diag), np.matrix.transpose(Q))

# this will be solved by LAPACK solvers
# but now we just want to get sure, that the approach works
eigenvalues_from_solver = np.sort(np.linalg.eig(M)[0])
# first is our apriori answer, second is the solution to eigenproblem we generated
np.testing.assert_array_almost_equal(eigenvalues, eigenvalues_from_solver)

print(M)

def to_upper_triangular(M):
    K = []
    for i in range(M.shape[0]):
        K.extend(M[i][i:])
    return K

print(to_upper_triangular(M))

[[ 0.56122037 -0.01634241 -0.05888313 -0.35382289]
 [-0.01634241 -0.54644802 -0.28135098 -0.04470847]
 [-0.05888313 -0.28135098 -0.78705086 -0.04902343]
 [-0.35382289 -0.04470847 -0.04902343 -0.23432177]]
[0.56122037410205594, -0.016342410201008382, -0.058883126956762052, -0.35382288561701203, -0.54644801522915254, -0.28135097541162529, -0.04470846896494067, -0.78705085529567154, -0.049023430562209792, -0.23432177481232697]


### Data storage format

```bash
data.csv

n_problems
n0; eig00,eig01,eig02...eig0n0; a000,a001,a002,a003...an0n0 <br>
n1; eig10,eig11,eig12...eig1n1; a100,a101,a102,a103...an1n1 
...
```
* n0 means 'first example matrix A0 has n0*n0 dimensionality
* eig00 means 'first egienvalue of the first example
* a000 means 'first example matrix element in the first row, first column.
* ...


In [40]:
def generate_data(problem_size_max, n_examples, type=None):
    
    if type == 'random_uniform_0_1':
        eigenvalues = [np.random.uniform(size=np.random.randint(10, problem_size_max)) for i in range(n_examples)]
    elif type == 'uniform_eigenvalues_dist':
        raise Exception('NOT IMPLEMENTED ERROR')
    else:
        # standard normal distr
        eigenvalues = [np.random.randn(np.random.randint(10, problem_size_max)) for i in range(n_examples)]
        
    matrices = []
    for e in eigenvalues:
        D = np.diag(e)
        Q, _ = np.linalg.qr(np.random.randn(len(e),len(e)))
        M = np.dot(np.dot(Q,D),np.matrix.transpose(Q))
        np.testing.assert_array_almost_equal(np.sort(e), np.sort(np.linalg.eig(M)[0]))
        #matrices.append(to_upper_triangular(M))
        matrices.append(M.flatten())
    return eigenvalues, matrices

In [41]:
def write_data(eigenvalues, matrices, filename='data.csv'):
    assert len(eigenvalues) == len(matrices)
    
    with open(filename, 'w') as f:
        f.write(str(N_PROBLEMS)+'\n')
        for e,m in zip(eigenvalues, matrices):
            line = '{};{};{}\n'.format(len(e), ','.join(map(str, e)), ','.join(map(str, m))) 
            f.write(line)

In [42]:
E,M = generate_data(PROBLEM_SIZE_MAX, N_PROBLEMS)
write_data(E,M)
print('Done writing {} eigenproblems.'.format(N_PROBLEMS))

Done writing 5 eigenproblems.
