In [1]:
from sklearn.manifold import TSNE
import numpy as np
data = np.array([
    # good superb
    [0,    0],  # D1: Neutral or negative
    [3,    1],  # D2: Moderately positive
    [5,    2],  # D3: Positive
    [2,    5]   # D4: Highly positive
])
X_standardized = (data - data.mean(axis=0)) / data.std(axis=0)
tsne = TSNE(n_components=1, random_state=0, perplexity=2, method="exact")
X_tsne = tsne.fit_transform(X_standardized)
X_tsne


array([[ 250.18192 ],
       [-135.89197 ],
       [ -33.74024 ],
       [  84.560005]], dtype=float32)

In [2]:
import torch
import torch.optim as optim
import numpy as np

def compute_squared_euclidean_distance(X):
    """
    Compute the squared Euclidean distance between each pair of the two collections of inputs.
    """
    # Compute the sum of squares for each point in X, resulting in a vector sum_X of shape (n,)
    sum_X = torch.sum(X * X, dim=1)
    
    # Expand sum_X to shape (1, n) for broadcasting, compute dot products of points in X,
    # and expand sum_X to shape (n, 1) for broadcasting.
    # The subtraction operation computes the squared distances using the identity:
    # ||x_i - x_j||^2 = ||x_i||^2 + ||x_j||^2 - 2<x_i, x_j>
    # where <x_i, x_j> denotes the dot product of x_i and x_j.
    D = sum_X.unsqueeze(0) - 2 * torch.mm(X, X.t()) + sum_X.unsqueeze(1)
    
    return D

def compute_squared_euclidean_distance(X):
    """
    Compute the squared Euclidean distance between each pair of the two collections of inputs.
    """
    sum_X = torch.sum(X * X, dim=1)
    D = sum_X.unsqueeze(0) - 2 * torch.mm(X, X.t()) + sum_X.unsqueeze(1)
    return D

def compute_joint_probabilities_high_dim(X, sigma=1.0, epsilon=1e-5):
    """
    Compute pairwise affinities in the high-dimensional space using a Gaussian kernel.
    """
    pairwise_distances = compute_squared_euclidean_distance(X)
    pairwise_affinities = torch.exp(-pairwise_distances / (2. * sigma ** 2))
    # Set diagonal to zero and normalize
    pairwise_affinities.fill_diagonal_(0)
    sum_affinities = torch.sum(pairwise_affinities, dim=1, keepdim=True)
    P = pairwise_affinities / (sum_affinities + epsilon)
    return P

def compute_joint_probabilities_low_dim(Y, epsilon=1e-5):
    """
    Compute pairwise affinities in the low-dimensional space with a Student-t distribution.
    """
    pairwise_distances = compute_squared_euclidean_distance(Y)
    inv_distances = 1 / (1 + pairwise_distances)
    inv_distances.fill_diagonal_(0)
    sum_inv_distances = torch.sum(inv_distances, dim=1, keepdim=True)
    Q = inv_distances / (sum_inv_distances + epsilon)
    return Q

def kl_divergence(P, Q, epsilon=1e-5):
    """
    Compute the Kullback-Leibler divergence loss between P and Q.
    """
    C = torch.sum(P * torch.log((P + epsilon) / (Q + epsilon)))
    return C

def run_sne(X, iterations=1000, learning_rate=10.0, perplexity=30.0, n_components=2):
    """
    Run SNE algorithm to reduce the dimensionality of X.
    """
    # Initialize low-dimensional representation
    Y = torch.randn(X.shape[0], n_components, requires_grad=True)
    
    # Compute high-dimensional affinities
    P = compute_joint_probabilities_high_dim(X, sigma=np.sqrt(perplexity / 2))
    
    optimizer = optim.SGD([Y], lr=learning_rate)
    
    for iteration in range(iterations):
        optimizer.zero_grad()
        Q = compute_joint_probabilities_low_dim(Y)
        loss = kl_divergence(P, Q)
        loss.backward()
        optimizer.step()
        
        if iteration % 100 == 0:
            print(f"Iteration {iteration}, loss = {loss.item()}")
    
    return Y.detach().numpy()

# Example usage
# Assuming X is your high-dimensional data as a PyTorch tensor
# X = torch.tensor(your_data_here, dtype=torch.float32)

# Y = run_sne(X)
