In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import random, os, pathlib, time
from tqdm import tqdm
from sklearn import datasets

In [2]:
device = torch.device("cuda:1")
# device = torch.device("cpu")

In [3]:
seed = 2023
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [4]:
N = 5
X = torch.randn(1, N)
### N+1 does not converge the X to exact values, but still have same scaled distance
# C = torch.randn(N+1, N) ?? Impossible to reconstruct with scaled distance
### Normalized/scaled distance is invertible with N+2 total distances
C = torch.randn(N+2, N)
dists = torch.cdist(X, C, p=2)
dists.shape

torch.Size([1, 7])

In [5]:
def compute_inverse(C, dists):
    A = 2*(C[1:]-C[:-1])
    c2 = C**2
    Z = (c2[:-1]-c2[1:]).sum(dim=1, keepdim=True)
    invA = torch.pinverse(A)

    d2 = dists**2
    D = d2[:, :-1]-d2[:, 1:]

    xrec = torch.matmul(invA, D.t()-Z).t()
    return xrec

In [6]:
compute_inverse(C, dists)

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]])

In [7]:
X

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]])

In [8]:
dists_n = dists/dists.sum()
X_n = compute_inverse(C, dists_n)
X_n

tensor([[ 0.0057,  0.3833,  2.2899,  0.2737, -0.6862]])

In [9]:
torch.cdist(X_n, C, p=2), dists

(tensor([[3.5869, 3.5838, 3.5827, 3.5776, 3.5755, 3.5695, 3.5689]]),
 tensor([[3.1373, 3.0464, 3.4614, 2.4063, 3.2618, 1.6331, 3.1386]]))

In [10]:
# STEPS = 9000
# scale = torch.nn.Parameter(torch.Tensor([1.]))
# optimizer = torch.optim.Adam([scale], lr=0.001)

# for step in range(STEPS):
# #     dists_n = dists/dists.sum()
#     X_n = compute_inverse(C, dists_n*scale)
#     dists_rec = torch.cdist(X_n, C, p=2)
#     error = ((dists_rec - dists)**2).sum()
    
#     optimizer.zero_grad()
#     error.backward()
#     optimizer.step()
    
#     if (step+1)%500 == 0:
#         print(f"Step {step}; scale: {scale}; error:{error}")

In [11]:
STEPS = 10000
scale = torch.nn.Parameter(torch.Tensor([1.]))
optimizer = torch.optim.Adam([scale], lr=0.01)

for step in range(STEPS):
#     dists_n = dists/dists.sum()
    X_n = compute_inverse(C, dists_n*scale)
    dists_rec = torch.cdist(X_n, C, p=2)
    dists_rec = dists_rec/dists_rec.sum()
    error = ((dists_rec - dists_n)**2).sum()
    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; scale: {scale}; error:{error}")

Step 499; scale: Parameter containing:
tensor([8.3194], requires_grad=True); error:0.004666309338063002
Step 999; scale: Parameter containing:
tensor([16.0105], requires_grad=True); error:0.0008479394600726664
Step 1499; scale: Parameter containing:
tensor([18.9528], requires_grad=True); error:4.209283724776469e-05
Step 1999; scale: Parameter containing:
tensor([19.7109], requires_grad=True); error:3.540022362358286e-06
Step 2499; scale: Parameter containing:
tensor([19.9677], requires_grad=True); error:3.122217151485529e-07
Step 2999; scale: Parameter containing:
tensor([20.0556], requires_grad=True); error:1.891223533334596e-08
Step 3499; scale: Parameter containing:
tensor([20.0799], requires_grad=True); error:5.652776025044659e-10
Step 3999; scale: Parameter containing:
tensor([20.0845], requires_grad=True); error:5.886180431957655e-12
Step 4499; scale: Parameter containing:
tensor([20.0848], requires_grad=True); error:4.867217739956686e-13
Step 4999; scale: Parameter containing:
t

In [12]:
dists_n*scale, dists

(tensor([[3.1373, 3.0464, 3.4614, 2.4063, 3.2618, 1.6331, 3.1386]],
        grad_fn=<MulBackward0>),
 tensor([[3.1373, 3.0464, 3.4614, 2.4063, 3.2618, 1.6331, 3.1386]]))

In [13]:
X_n = compute_inverse(C, dists_n*scale)
print(X_n)
dists_rec = torch.cdist(X_n, C, p=2)
dists_rec = dists_rec/dists_rec.sum()
print(dists_rec)

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], grad_fn=<TBackward0>)
tensor([[0.1562, 0.1517, 0.1723, 0.1198, 0.1624, 0.0813, 0.1563]],
       grad_fn=<DivBackward0>)


In [14]:
dists_n

tensor([[0.1562, 0.1517, 0.1723, 0.1198, 0.1624, 0.0813, 0.1563]])

In [15]:
X, X_n

(tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]),
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], grad_fn=<TBackward0>))

In [16]:
### The normalized distance are same for both X and X_n which are different pints.

In [17]:
STEPS = 10000
X_guess = torch.nn.Parameter(torch.zeros_like(X))
optimizer = torch.optim.Adam([X_guess], lr=0.01)

for step in range(STEPS):
    dists_g = torch.cdist(X_guess, C, p=2)
    dists_g = dists_g/dists_g.sum()
    error = ((dists_g - dists_n)**2).sum()
    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; error:{error}")
        print(f"Original:{X.numpy()}; \nReconstructed:{X_guess.data.numpy()}")

Step 499; error:2.283181174789206e-06
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-1.168552    0.5258709  -0.2790756   0.66481483 -0.72627527]]
Step 999; error:1.795026882689399e-08
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-1.203882    0.5474053  -0.3744936   0.68865    -0.74083066]]
Step 1499; error:3.661937419963124e-11
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-1.2073299   0.549246   -0.38509023  0.690873   -0.7423272 ]]
Step 1999; error:1.715294573045867e-14
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-1.2074907   0.5493308  -0.3855891   0.6909761  -0.74239683]]
Step 2499; error:5.162537064506978e-15
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-1.2074925   0.5493317  -0.3855934   0.69097745 -0.7423974 ]]
Step 2999; error:2.55351295663786e-15
Orig

In [18]:
torch.cdist(X_guess, C, p=2).shape

torch.Size([1, 7])

In [19]:
X_guess, X

(Parameter containing:
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], requires_grad=True),
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]))

In [20]:
dists_g, dists_n

(tensor([[0.1562, 0.1517, 0.1723, 0.1198, 0.1624, 0.0813, 0.1563]],
        grad_fn=<DivBackward0>),
 tensor([[0.1562, 0.1517, 0.1723, 0.1198, 0.1624, 0.0813, 0.1563]]))

In [21]:
### Again, same distance ratio but with different points

In [22]:
X_n, X_guess, X

(tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], grad_fn=<TBackward0>),
 Parameter containing:
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], requires_grad=True),
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]))

In [23]:
dists.sum()

tensor(20.0850)

## Now, scaling the C similar to X

In [24]:
STEPS = 10000
scale = torch.nn.Parameter(torch.Tensor([1.]))
optimizer = torch.optim.Adam([scale], lr=0.01)

for step in range(STEPS):
#     dists_n = dists/dists.sum()
    X_n = compute_inverse(C, dists_n*scale)
    dists_rec = torch.cdist(X_n, C, p=2)
#     dists_rec = dists_rec/dists_rec.sum()
    error = ((dists_rec - dists_n*scale)**2).sum()
    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; scale: {scale}; error:{error}")

Step 499; scale: Parameter containing:
tensor([5.9606], requires_grad=True); error:46.62229537963867
Step 999; scale: Parameter containing:
tensor([10.3406], requires_grad=True); error:19.973737716674805
Step 1499; scale: Parameter containing:
tensor([13.6283], requires_grad=True); error:7.002164840698242
Step 1999; scale: Parameter containing:
tensor([15.8327], requires_grad=True); error:2.3078818321228027
Step 2499; scale: Parameter containing:
tensor([17.2569], requires_grad=True); error:0.7844226360321045
Step 2999; scale: Parameter containing:
tensor([18.1897], requires_grad=True); error:0.2808089852333069
Step 3499; scale: Parameter containing:
tensor([18.8191], requires_grad=True); error:0.10404705256223679
Step 3999; scale: Parameter containing:
tensor([19.2545], requires_grad=True); error:0.03864260017871857
Step 4499; scale: Parameter containing:
tensor([19.5589], requires_grad=True); error:0.013836178928613663
Step 4999; scale: Parameter containing:
tensor([19.7698], require

In [25]:
X_n

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]], grad_fn=<TBackward0>)

In [26]:
X

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]])

In [None]:
### Softmax and Layernorm also have some scaling/division .. following similar trend for reconstruction.