# Distance Matrix Computation

by Flávia Carvalhido


- input (2):  M x N matrix of feature vectors, distance measure
- output (1): M x M diagonally symmetric matrix of distances between all the vectors


In [47]:
import numpy as np 
from sklearn.metrics.pairwise import cosine_distances

class DistanceMatrixComputation:
    
    def __init__(self, feature_vectors=None, distance_measure=0):
        distance_values = ["euclidean", "cosine"]
        self.feature_vectors = feature_vectors
        
        if distance_measure in distance_values:
            self.distance_measure = distance_measure

        elif distance_measure in [0,1]: 
            self.distance_measure = distance_values[distance_measure]

        else: 
            raise ValueError("Distance Measure can only have the following values: \"euclidean\"(0) or \"cosine\"(1)")


    def compute(self):
        mat = []

        if self.distance_measure == "euclidean": 
            print("Computing Euclidean Distance Matrix")
            for vec in self.feature_vectors:
                mat.append(np.array([np.linalg.norm(vec - b) for b in self.feature_vectors]))
          
        elif self.distance_measure == "cosine":
            print("Computing Cosine Distance Matrix")
            for vec in self.feature_vectors:
                mat.append(np.array([cosine_distances([vec], [b])[0][0] for b in self.feature_vectors])) # FIXME: how to avoid indexing the results obtained from cosine_distances. Maybe use another function not adapted to multiple samples?

        return np.array(mat)

    def help(self): 
        print("Distance Matric Computation Usage:\n Define DistanceMatrixComputation instance by creating a variable and assigning it to DistanceMatrixComputation(feature_vectors, distance_measure).\n   - Parameter feature_vectors should be a MxN array containing all M feature_vectors with dimension N. \n   - Parameter distance_measure should be either \"euclidean\" or \"cosine\". 0 and 1 can also be used to signify euclidean and cosine, respectively. \n\n After instancing a variable DistanceMatrixComputation var, the method compute(self) can be called simply by running \"var.compute()\" which will return a diagonally symmetric distance matrix with dimensions MxM")


In [43]:
# example
M = [np.array([255,47,98]), np.array([0,250,239]), np.array([0,40,78])]

In [48]:
test_help = DistanceMatrixComputation()
test_help.help()

Distance Matric Computation Usage:
 Define DistanceMatrixComputation instance by creating a variable and assigning it to DistanceMatrixComputation(feature_vectors, distance_measure).
   - Parameter feature_vectors should be a MxN array containing all M feature_vectors with dimension N. 
   - Parameter distance_measure should be either "euclidean" or "cosine". 0 and 1 can also be used to signify euclidean and cosine, respectively. 

 After instancing a variable DistanceMatrixComputation var, the method compute(self) can be called simply by running "var.compute()" which will return a diagonally symmetric distance matrix with dimensions MxM


In [39]:
test_euclidean = DistanceMatrixComputation(M, "euclidean")
# test_euclidean = DistanceMatrixComputation(M, 0)

dist_matrix = test_euclidean.compute()

dist_matrix

Computing Euclidean Distance Matrix


array([[  0.        , 355.12673794, 255.8788776 ],
       [355.12673794,   0.        , 264.6148144 ],
       [255.8788776 , 264.6148144 ,   0.        ]])

In [33]:
test_cosine = DistanceMatrixComputation(M, "cosine")
# test_euclidean = DistanceMatrixComputation(M, 1)

dist_matrix = test_cosine.compute()

dist_matrix

Computing Cosine Distance Matrix


array([[0.00000000e+00, 6.33135808e-01, 6.08043670e-01],
       [6.33135808e-01, 1.11022302e-16, 5.52738257e-02],
       [6.08043670e-01, 5.52738257e-02, 0.00000000e+00]])