# 01 - Test Numpy and Pandas basic operations

This notebook tests some Numpy and Pandas operations directly from [GitHub.dev](https://github.dev) console using browser only.

In [None]:
# Whoa!

import math
import numpy as np
import pandas as pd
from scipy.spatial import distance

In [None]:
# Calculating the Euclidean distance between two vectors (the shortest distance between the 2 points irrespective of the dimensions).

def euclidean_dist_simple(v1, v2):
    dist = [math.pow((a - b), 2) for a, b in zip(v1, v2)]
    eudist = math.sqrt(sum(dist))
    return eudist


def euclidean_dist_numpy_1(v1, v2):
    v1_a = np.array(v1)
    v2_a = np.array(v2)
    sd = np.sum((v1 - v2) ** 2)
    eudist = np.sqrt(sd)
    return eudist

def euclidean_dist_numpy_2(v1, v2):
    return np.linalg.norm(v1 - v2)

def euclidean_dist_scipy(v1, v2):
    return distance.euclidean(v1, v2)


In [18]:
dis1 = np.random.rand(20)
dis2 = np.random.rand(20)
v1, v2 = np.array(dis1), np.array(dis2)
v1, v2

(array([0.03749332, 0.32620903, 0.14803189, 0.42369012, 0.89550885,
        0.89766749, 0.03343502, 0.77541136, 0.84541714, 0.31494427,
        0.90605383, 0.15578698, 0.80177435, 0.47315441, 0.82148173,
        0.36941354, 0.45338368, 0.9514657 , 0.09590251, 0.26166393]),
 array([0.23506   , 0.56125406, 0.316076  , 0.5050455 , 0.5907296 ,
        0.31781446, 0.54741105, 0.39965401, 0.81526403, 0.49994884,
        0.82842781, 0.60452791, 0.01704468, 0.63365631, 0.77807435,
        0.72446147, 0.17760082, 0.06243919, 0.62444235, 0.94305795]))

In [None]:
# Inspired by https://stackoverflow.com/questions/37794849/efficient-and-precise-calculation-of-the-euclidean-distance

import timeit

def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

wrappered1 = wrapper(euclidean_dist_simple, v1, v2)
wrappered2 = wrapper(euclidean_dist_numpy_1, v1, v2)
wrappered3 = wrapper(euclidean_dist_numpy_2, v1, v2)
wrappered5 = wrapper(euclidean_dist_scipy, v1, v2)
ts = timeit.repeat(wrappered1, repeat=3, number=100000)
tn1 = timeit.repeat(wrappered2, repeat=3, number=100000)
tn2 = timeit.repeat(wrappered3, repeat=3, number=100000)
tscipy = timeit.repeat(wrappered5, repeat=3, number=100000)

print(f'math approach: {sum(ts)/len(ts)}')
print(f'numpy simple approach: {sum(tn1)/len(tn1)}')
print(f'numpy.linalg.norm approach: {sum(tn2)/len(tn2)}')
print(f'scipy.distance approach: {sum(tscipy)/len(tscipy)}')

math approach: 3.3344666666666853
numpy simple approach: 1.51230000000002
numpy.linalg.norm approach: 1.0336666666667043
scipy.distance approach: 2.0062333333333604
