In [1]:
import numpy as np
import pandas as pd

In [10]:
def embed_data(x):
    '''
    Embed the data into a numpy array for solving a linear system
    '''
    emb = [0] * 166
    
    # the first entry is 0
    emb[0] = 1

    # entries 1-6 consist of e_i (6)
    for i in range(6):
        emb[i+1] = x[i]

    # entries 7-12 consist of e_i^2 (6)
    for i in range(6):
        emb[i+7] = x[i]**2

    # entries 13-27 consist of e_ie_j (i,j distinct) (15)
    counter = 0
    for i in range(6):
        for j in range(i):
            emb[counter+13] = x[i]*x[j]
            counter += 1

    # entries 28-33 consist of e_i^3 (6)
    for i in range(6):
        emb[28+i] = x[i]**3

    # entries 34-63 consist of e_i^2e_j (i,j distinct) (30)
    counter = 0
    for i in range(6):
        for j in range(6):
            if j != i:
                emb[counter+34] = x[i]**2 * x[j]
                counter += 1

    # entries 64-82 consist of e_ie_je_k (i,j,k distinct) (20)
    counter = 0
    for i in range(6):
        for j in range(i):
            for k in range(j):
                emb[counter+64] = x[i]*x[j]*x[k]
                counter += 1

    # entries 83-165 are entries 0-82 times e
    for i in range(83):
        emb[83+i] = emb[i] * x[6]

    return emb

In [28]:
url = '/Users/zou/Documents/brown/codes/pentagram/out/31_map/coords_small.csv'
df = pd.read_csv(url)

# preprocess
df = df.drop_duplicates()
rows_with_inf = df.index[np.isinf(df).any(axis=1)]
df = df.drop(rows_with_inf)

# cast to numpy
data = df.to_numpy()
data_embedded = np.array([embed_data(row) for row in data])
data_embedded.shape

(26, 166)

In [29]:
# Check for NaN or inf values
print(np.isnan(data_embedded).any())  # Should return False
print(np.isinf(data_embedded).any())  # Should return False

False
False


In [40]:
# Perform Singular Value Decomposition
U, S, Vt = np.linalg.svd(data_embedded)

# Tolerance to consider singular values as zero
tol = 1e-10

# Find the rank by counting non-zero singular values
rank = (S > tol).sum()

# The null space corresponds to the last columns of Vt
null_space = Vt[rank:]

# Display the null space
null_space[0]

array([ 1.97602312e-02,  5.18784058e-02,  9.28715420e-03, -1.10555664e-02,
        1.63839048e-02,  1.60435626e-02,  5.04153811e-03,  1.45293651e-01,
        5.14763457e-03,  1.88817185e-02,  1.59934000e-02,  1.95853025e-02,
        1.17156989e-03,  2.52396227e-02, -4.07064288e-02, -5.69398148e-03,
        4.89461627e-02,  7.73650480e-03, -1.38576043e-02,  5.34919700e-02,
        8.32330169e-03, -1.44019700e-02,  1.78019801e-02,  1.45486572e-02,
        2.43494533e-03, -2.24046673e-03,  4.79019955e-03,  4.99237967e-03,
        4.42327805e-01,  2.15372309e-03, -2.12082748e-02,  3.13297563e-02,
        3.81932262e-02,  2.55306089e-04,  6.84527709e-02, -1.32411542e-01,
        1.79619949e-01,  1.96539369e-01,  4.09080669e-02,  1.20643343e-02,
       -2.73616575e-03,  3.59175126e-03,  3.89759451e-03,  1.19332834e-03,
        4.50006659e-02,  8.48817989e-03,  1.88200156e-02,  1.66450795e-02,
        4.77369289e-03,  7.46380062e-02,  6.83374782e-03, -1.53089520e-02,
        3.35301325e-02,  