In [5]:
# This tells matplotlib not to try opening a new window for each plot.
%matplotlib inline

from matplotlib.colors import ListedColormap

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris

In [6]:
# Load the data, which is included in sklearn.
iris = load_iris()

# assigns to X and Y for easier referencing
X, Y = iris.data, iris.target

# Shuffle the data, but make sure that the features and accompanying labels stay in sync.
np.random.seed(0)
shuffle = np.random.permutation(np.arange(X.shape[0]))
X, Y = X[shuffle], Y[shuffle]

# Split into train and test.
train_data, train_labels = X[:100], Y[:100]
test_data, test_labels = X[100:], Y[100:]

In [14]:
# assign for easier referencing
v1 = train_data[0]
v2 = train_data[1]

print(v1)
print(v2)

[5.8 2.8 5.1 2.4]
[6.  2.2 4.  1. ]


In [15]:
# how to cartesian product in numpy, without pandas
# https://stackoverflow.com/questions/11144513/numpy-cartesian-product-of-x-and-y-array-points-into-single-array-of-2d-points
# note, you actually don't to do this for distance calcuations

cx = np.transpose([np.tile(v1, len(v2)), np.repeat(v2, len(v1))])
print(cx)

[[5.8 6. ]
 [2.8 6. ]
 [5.1 6. ]
 [2.4 6. ]
 [5.8 2.2]
 [2.8 2.2]
 [5.1 2.2]
 [2.4 2.2]
 [5.8 4. ]
 [2.8 4. ]
 [5.1 4. ]
 [2.4 4. ]
 [5.8 1. ]
 [2.8 1. ]
 [5.1 1. ]
 [2.4 1. ]]


In [23]:
# in eucildean distance, you are calculating element wise

ew = np.vstack((v1, v2)).transpose()
ew

array([[5.8, 6. ],
       [2.8, 2.2],
       [5.1, 4. ],
       [2.4, 1. ]])

In [24]:
# how to map in numpy
# note to assign the list of the map to a var, or you lose it
sqd = list(map(lambda x: (x[0] - x[1]) ** 2, ew))
sqd

[0.04000000000000007,
 0.3599999999999996,
 1.2099999999999993,
 1.9599999999999997]

In [25]:
ssqd = sum(sqd)
ssqd

3.5699999999999985

In [26]:
ssqd ** 0.5

1.889444362769118