In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import random, os, pathlib, time
from tqdm import tqdm
from sklearn import datasets

In [2]:
# device = torch.device("cuda:1")
device = torch.device("cpu")

In [3]:
seed = 2023
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [29]:
N = 5
X = torch.randn(1, N)
### N+1 does not converge the X to exact values, but still have same scaled distance
C = torch.randn(N+1, N) #?? Impossible to reconstruct with scaled distance
### Normalized/scaled distance is invertible with N+2 total distances
# C = torch.randn(N+2, N)
dists = torch.cdist(X, C, p=2)
dists.shape

torch.Size([1, 6])

In [30]:
def compute_inverse(C, dists):
    A = 2*(C[1:]-C[:-1])
    c2 = C**2
    Z = (c2[:-1]-c2[1:]).sum(dim=1, keepdim=True)
    invA = torch.pinverse(A)

    d2 = dists**2
    D = d2[:, :-1]-d2[:, 1:]

    xrec = torch.matmul(invA, D.t()-Z).t()
    return xrec

In [31]:
compute_inverse(C, dists)

tensor([[ 0.1570, -0.2335, -1.7990, -0.2919, -0.0465]])

In [32]:
X

tensor([[ 0.1570, -0.2335, -1.7990, -0.2919, -0.0465]])

In [33]:
dists.sum() ## This should be reconstructed

tensor(20.8321)

In [34]:
dists_n = dists/dists.sum()
X_n = compute_inverse(C, dists_n)
X_n

tensor([[-1.4427,  0.2089, -0.3084,  0.7398, -0.8711]])

In [35]:
torch.cdist(X_n, C, p=2), dists

(tensor([[3.1069, 3.1075, 3.1085, 3.1112, 3.1067, 3.1070]]),
 tensor([[3.0684, 3.3440, 3.7208, 4.5961, 2.9812, 3.1217]]))

In [36]:
# STEPS = 9000
# scale = torch.nn.Parameter(torch.Tensor([1.]))
# optimizer = torch.optim.Adam([scale], lr=0.001)

# for step in range(STEPS):
# #     dists_n = dists/dists.sum()
#     X_n = compute_inverse(C, dists_n*scale)
#     dists_rec = torch.cdist(X_n, C, p=2)
#     error = ((dists_rec - dists)**2).sum()
    
#     optimizer.zero_grad()
#     error.backward()
#     optimizer.step()
    
#     if (step+1)%500 == 0:
#         print(f"Step {step}; scale: {scale}; error:{error}")

In [37]:
STEPS = 10000
scale = torch.nn.Parameter(torch.Tensor([1.]))
optimizer = torch.optim.Adam([scale], lr=0.01)

for step in range(STEPS):
#     dists_n = dists/dists.sum()
    X_n = compute_inverse(C, dists_n*scale)
    dists_rec = torch.cdist(X_n, C, p=2)
    
#     dists_rec = dists_rec/dists_rec.sum()
#     error = ((dists_rec - dists_n)**2).sum()

#     dists_rec = dists_rec/dists_rec.sum()
    error = ((dists_rec - dists_n*scale)**2).sum()

    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; scale: {scale}; error:{error}")

Step 499; scale: Parameter containing:
tensor([5.7157], requires_grad=True); error:26.895294189453125
Step 999; scale: Parameter containing:
tensor([9.6379], requires_grad=True); error:11.99869155883789
Step 1499; scale: Parameter containing:
tensor([12.6276], requires_grad=True); error:4.922844886779785
Step 1999; scale: Parameter containing:
tensor([14.7873], requires_grad=True); error:2.004864454269409
Step 2499; scale: Parameter containing:
tensor([16.3305], requires_grad=True); error:0.846184492111206
Step 2999; scale: Parameter containing:
tensor([17.4507], requires_grad=True); error:0.37348130345344543
Step 3499; scale: Parameter containing:
tensor([18.2839], requires_grad=True); error:0.17093908786773682
Step 3999; scale: Parameter containing:
tensor([18.9180], requires_grad=True); error:0.07995717227458954
Step 4499; scale: Parameter containing:
tensor([19.4090], requires_grad=True); error:0.0375974141061306
Step 4999; scale: Parameter containing:
tensor([19.7928], requires_gr

In [13]:
dists_n*scale, dists

(tensor([[2.1433, 2.0812, 2.3674, 2.4449, 2.0222, 1.0560]],
        grad_fn=<MulBackward0>),
 tensor([[3.1373, 3.0464, 3.4655, 3.5788, 2.9602, 1.5457]]))

In [14]:
X_n = compute_inverse(C, dists_n*scale)
print(X_n)
dists_rec = torch.cdist(X_n, C, p=2)
dists_rec = dists_rec/dists_rec.sum()
print(dists_rec)

tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], grad_fn=<TBackward>)
tensor([[0.1769, 0.1718, 0.1954, 0.2018, 0.1669, 0.0872]],
       grad_fn=<DivBackward0>)


In [15]:
dists_n

tensor([[0.1769, 0.1718, 0.1954, 0.2018, 0.1669, 0.0872]])

In [16]:
X, X_n

(tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]),
 tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], grad_fn=<TBackward>))

In [17]:
### The normalized distance are same for both X and X_n which are different pints.

In [18]:
STEPS = 10000
X_guess = torch.nn.Parameter(torch.zeros_like(X))
optimizer = torch.optim.Adam([X_guess], lr=0.01)

for step in range(STEPS):
    dists_g = torch.cdist(X_guess, C, p=2)
    dists_g = dists_g/dists_g.sum()
    error = ((dists_g - dists_n)**2).sum()
    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; error:{error}")
        print(f"Original:{X.numpy()}; \nReconstructed:{X_guess.data.numpy()}")

Step 499; error:6.050782758393325e-06
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-0.20903355  0.32832682 -0.19603808 -0.27129516  0.23769674]]
Step 999; error:2.119580955195488e-08
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-0.25581643  0.36677206 -0.47298455 -0.25727013  0.27943337]]
Step 1499; error:1.587180387119247e-11
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-0.261079    0.369737   -0.49035275 -0.25406897  0.2799974 ]]
Step 1999; error:2.4980018054066022e-15
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-0.26122996  0.3698208  -0.49083844 -0.25397566  0.28000942]]
Step 2499; error:1.9984014443252818e-15
Original:[[-1.2074946   0.54933286 -0.3855975   0.6909789  -0.74239856]]; 
Reconstructed:[[-0.26123118  0.36982146 -0.4908421  -0.25397497  0.28000957]]
Step 2999; error:4.218847493575595e-15
O

In [19]:
torch.cdist(X_guess, C, p=2).shape

torch.Size([1, 6])

In [20]:
X_guess, X

(Parameter containing:
 tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], requires_grad=True),
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]))

In [21]:
dists_g, dists_n

(tensor([[0.1769, 0.1718, 0.1954, 0.2018, 0.1669, 0.0872]],
        grad_fn=<DivBackward0>),
 tensor([[0.1769, 0.1718, 0.1954, 0.2018, 0.1669, 0.0872]]))

In [22]:
### Again, same distance ratio but with different points

In [23]:
X_n, X_guess, X

(tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], grad_fn=<TBackward>),
 Parameter containing:
 tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], requires_grad=True),
 tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]]))

In [24]:
dists.sum()

tensor(17.7339)

## Now, scaling the C similar to X

In [25]:
STEPS = 10000
scale = torch.nn.Parameter(torch.Tensor([1.]))
optimizer = torch.optim.Adam([scale], lr=0.01)

for step in range(STEPS):
#     dists_n = dists/dists.sum()
    X_n = compute_inverse(C, dists_n*scale)
    dists_rec = torch.cdist(X_n, C, p=2)
#     dists_rec = dists_rec/dists_rec.sum()
    error = ((dists_rec - dists_n*scale)**2).sum()
    
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
    if (step+1)%500 == 0:
        print(f"Step {step}; scale: {scale}; error:{error}")

Step 499; scale: Parameter containing:
tensor([5.5649], requires_grad=True); error:7.854038715362549
Step 999; scale: Parameter containing:
tensor([8.6790], requires_grad=True); error:1.7509962320327759
Step 1499; scale: Parameter containing:
tensor([10.4057], requires_grad=True); error:0.3475249707698822
Step 1999; scale: Parameter containing:
tensor([11.3042], requires_grad=True); error:0.06650350987911224
Step 2499; scale: Parameter containing:
tensor([11.7627], requires_grad=True); error:0.0113601079210639
Step 2999; scale: Parameter containing:
tensor([11.9834], requires_grad=True); error:0.001502073835581541
Step 3499; scale: Parameter containing:
tensor([12.0761], requires_grad=True); error:0.00012817858078051358
Step 3999; scale: Parameter containing:
tensor([12.1067], requires_grad=True); error:5.690003035851987e-06
Step 4499; scale: Parameter containing:
tensor([12.1139], requires_grad=True); error:1.021492153086001e-07
Step 4999; scale: Parameter containing:
tensor([12.1149]

In [26]:
X_n

tensor([[-0.2612,  0.3698, -0.4908, -0.2540,  0.2800]], grad_fn=<TBackward>)

In [27]:
X

tensor([[-1.2075,  0.5493, -0.3856,  0.6910, -0.7424]])

In [28]:
### Softmax and Layernorm also have some scaling/division .. following similar trend for reconstruction.