# Data Structures and Algorithms

In [1]:
import numpy as np

## Understanding Numpy
We use numpy for vectorised operations of arrays which are faster than using for-loops and lists

In [63]:
## How to add two vectors using for-loops
a = (1,2,3,4,5)
b = (2,3,4,5,6)
c = list() #Target is a + b

for i in range(len(a)):
    c.append(a[i] + b[i])

print(c)

[3, 5, 7, 9, 11]


In [65]:
## How to add two vectors with numpy
a = np.array(a) #I am converting the previous lists into an array
b = np.array(b)

## With numpy adding vectors is like adding two numbers at once, no for loops
print(a + b)

[ 3  5  7  9 11]


We'll use numpy for all operations in this code where we need to work fast with arrays

# Monte Carlo Simulation

In [68]:
## Can we estimate pi using Monte Carlo
n = 100000 #Reduce/increase this number to understand the effect on pi's accuracy
x  = np.random.rand(n)
y = np.random.rand(n)

dist = np.sqrt(x**2 + y**2)
pi = 4 * len(np.where(dist<= 1)[0])/n
print(pi) #note that the value changes cause this is randomised


3.1418


# Dynamic Programming
Solving large problems by simply finding a solutions to the smallest subset problems that comprises the large problem

In [73]:
## Solve fibonacci using a simple recursive function (see how simple the subset makes the entire problem)
def fibonacci(n):
    if n == 0:
        return 0
    if n == 1:
        return 1
    return fibonacci(n-1) + fibonacci(n-2)

In [78]:
fibonacci(40) ## look at the time needed for execution when the number gets above 40

102334155

In [79]:
## We solve the recursive function speed problem by adding a cache (Do you understand why?)
def fibo(n, cache):
    if n == 0:
        return 0
    if n == 1:
        return 1
    if n in cache.keys():
        return cache[n]
    cache[n] = fibo(n-1,cache) + fibo(n-2,cache)
    return cache[n]

In [81]:
cache = dict()
fibo(1000,cache) ## Now we can do fibonacci 1000 in less than a second

43466557686937456435688527675040625802564660517371780402481729089536555417949051890403879840079255169295922593080322634775209689623239873322471161642996440906533187938298969649928516003704476137795166849228875

In [82]:
cache = dict()
fibo(10000,cache) ## But why can't we solve for 10000 yet the complexity increase is very small when using a cache

RecursionError: maximum recursion depth exceeded while calling a Python object

In [83]:
## We solve the memoization problem by using a bottom-up approach instead, only two terms are saved in memory at any given time
def fib(n):
    a = 0
    b = 1
    if n == 0:
        return a
    if n == 1:
        return b
    for i in range(1,n):
        c = a + b
        a = b
        b = c
    return b

In [84]:
fib(10000) #Now we can do fib(10000) without the maximum depth problem with significant speed

3364476487643178326662161200510754331030214846068006390656476997468008144216666236815559551363373402558206533268083615937373479048386526826304089246305643188735454436955982749160660209988418393386465273130008883026923567361313511757929743785441375213052050434770160226475831890652789085515436615958298727968298751063120057542878345321551510387081829896979161312785626503319548714021428753269818796204693609787990035096230229102636813149319527563022783762844154036058440257211433496118002309120828704608892396232883546150577658327125254609359112820392528539343462090424524892940390170623388899108584106518317336043747073790855263176432573399371287193758774689747992630583706574283016163740896917842637862421283525811282051637029808933209990570792006436742620238978311147005407499845925036063356093388383192338678305613643535189213327973290813373264265263398976392272340788292817795358057099369104917547080893184105614632233821746563732124822638309210329770164805472624384237486241145309381220656491403