<a href="https://colab.research.google.com/github/zuzka05/stat_learn/blob/main/QuantTradingAccelerator_Video2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quant Trading Accelerator ðŸš€

Learn from 0, extremely fast => JIT Learning => Build, Test, Learn, Iterate ðŸš€

## Part 2: Arrays (The fundamental building block of Quant Trading/AI/ML)

In [29]:
prices = [10.2, 9.4, 9.9, 10.5]

In [30]:
prices

[10.2, 9.4, 9.9, 10.5]

In [32]:
#list means array
type(prices)

list

In [33]:
len(prices)

4

### Access elements

In [34]:
prices[0]

10.2

In [35]:
prices[1]

9.4

In [36]:
prices[2]

9.9

In [37]:
prices[3]

10.5

In [38]:
prices[-1]

10.5

In [39]:
prices[-2]

9.9

In [40]:
prices[-3]

9.4

In [41]:
prices[-4]

10.2

In [13]:
#prices[5]

### Update Elements

In [42]:
#Update the first element to None
prices[0] = None

In [43]:
prices

[None, 9.4, 9.9, 10.5]

In [44]:
prices[-1] = None

In [45]:
prices

[None, 9.4, 9.9, None]

### Remove elements

In [46]:
#Remove at the tail of the list
prices.pop()

In [21]:
prices

[None, 9.4]

In [47]:
#Remove from the beginning
#you receive the element removed
prices.pop(0)

In [49]:
prices

[9.4, 9.9]

In [50]:
del prices[0]

In [51]:
prices

[9.9]

In [52]:
#here you don't receive the element removed
del prices[-1]

In [53]:
prices

[]

In [54]:
n = 200000000
prices_ts = [1.0 for _ in range(n)]

In [56]:
#Remove from the beginning, then need to move everything across
#linear time operation
#it scales linearly with the size of your array
%%time
prices_ts.pop(0)

CPU times: user 162 ms, sys: 1.98 ms, total: 164 ms
Wall time: 162 ms


1.0

In [57]:
prices_ts = [1.0 for _ in range(n)]

In [59]:
#now pop it at the end of the list
#it's much faster because it doesn't have to shift elements across
#constant time operation
#you should remove the elements from the array from the end
%%time
prices_ts.pop()

CPU times: user 4 Âµs, sys: 0 ns, total: 4 Âµs
Wall time: 6.44 Âµs


1.0

In [None]:
### Add elements

In [60]:
prices = []

In [61]:
#add prices manually
prices.append(10.5)

In [62]:
prices

[10.5]

In [63]:
#add multiple prices in one line
prices.extend([11.4, 9.5, 12.3])

In [64]:
prices

[10.5, 11.4, 9.5, 12.3]

### Inhomogeneous Array

In [65]:
#Mixed data types
[1.0, "a", True, 2]

[1.0, 'a', True, 2]

In [66]:
[1.0, 2, 3.0, 4]

[1.0, 2, 3.0, 4]

### Homogenerous Array

In [69]:
#Most common is float array
[1.0, 2.0, 3.0, 4.0]

[1.0, 2.0, 3.0, 4.0]

In [70]:
[1, 2, 3, 4]

[1, 2, 3, 4]

### Loops

In [71]:
for i in range(5):
  print(i)

0
1
2
3
4


In [74]:
# loop over each element individually
# we don't know how many prices we got when we loop over them
prices = [10.2, 9.5, 11.5, 9.4]
for price in prices:
  print(price)

10.2
9.5
11.5
9.4


In [75]:
#each element represents round-trip P&L
#loop over each element to know what the total is
trade_pnls = [1.2, -2.0, -1.0, 4.1]

total_pnl = 0.0

for trade_pnl in trade_pnls:
  total_pnl += trade_pnl

total_pnl

2.3

In [None]:
#pandas - doing calcs while looping

In [77]:
1.2 + -2.0 + -1.0 + 4.1

2.3

### Numpy Arrays

In [78]:
import numpy as np

n = 100000000
a = np.ones(n)

In [None]:
#each array element will be 1
#identity vector

In [79]:
a

array([1., 1., 1., ..., 1., 1., 1.])

In [80]:
len(a)

100000000

In [None]:
#trade p&l - we want to sum them up

In [81]:
%%time
np.sum(a)

CPU times: user 95.1 ms, sys: 49 Âµs, total: 95.2 ms
Wall time: 94.5 ms


np.float64(100000000.0)

In [None]:
#the above is so much faster because it's a homogeneous array
#it's all floating-point numbers, it can take advantage of this
#By taking specialized CPU instructions that are called SIMD
#It parallizes the calculations, it is what GPUs are doing
#If it was inhomogeneous array, it wouldn't be able to run the specialized instructions
#It's an array of floating point numbers, it can use specialized instructions
#It is written in C, highly optimized calcs

In [82]:
%%time
sum = 0.0
for val in a:
  sum += val
sum

CPU times: user 20.1 s, sys: 9.51 ms, total: 20.1 s
Wall time: 20.5 s


np.float64(100000000.0)

### Logarithms (log)

In [83]:
np.exp(2)

np.float64(7.38905609893065)

In [86]:
#It's an inverse of the exp function
np.log(np.exp(2))

np.float64(2.0)

In [None]:
### Invest $1000 at a continuous rate of 5% per year

In [87]:
#What we make after 1 year
1000 * 1.05

1050.0

In [88]:
#Re-invest money into it
1000 * 1.05 * 1.05

1102.5

In [89]:
1000 * 1.05 * 1.05 * 1.05

1157.625

In [90]:
1000 * 1.05 * 1.05 * 1.05 * 1.05

1215.5062500000001

In [91]:
capital = 1000
for _ in range(20):
  capital *= 1.05
capital

2653.2977051444223

In [92]:
1000 * 1.05 ** 20

2653.2977051444223

### How long to double our investment?

In [None]:
#We could use Brute-force approach
#Loop through every t, without being crazy to do it

In [93]:
capital = 1000
t = 1

2 * capital == capital * 1.05 ** t

False

In [94]:
2 == (1.05) ** t

False

In [95]:
np.log(2) == t * np.log(1.05)

np.False_

In [96]:
t = np.log(2) / np.log(1.05)

In [97]:
t

np.float64(14.206699082890461)

In [98]:
capital * 1.05 ** t

np.float64(2000.0)

### Why returns?

In [99]:
pnl = 100

In [100]:
pnl / 50

2.0

In [101]:
pnl / 99

1.0101010101010102

### Log Returns

In [102]:
portfolio = [100, 120, 100]

In [103]:
[120-100, 100-120]

[20, -20]

In [104]:
#Asymmetric property of returns
[20/100, -20/120]

[0.2, -0.16666666666666666]

In [105]:
log_returns = [np.log(120/100), np.log(100/120)]

In [106]:
#unitless and symmetric measurement
log_returns

[np.float64(0.1823215567939546), np.float64(-0.1823215567939546)]

In [None]:
#sum log returns to gives us the compounded rate of return at each time step

In [109]:
np.sum(log_returns)

np.float64(0.0)

In [110]:
100 * np.sum(log_returns)

np.float64(0.0)

### Exercise 1: Calculate the average log return

In [122]:
log_returns = [-0.1, 0.22, 0.15, 0.344, -0.2]
avg_log_return = 0.0

In [124]:
for log_return in log_returns:
  avg_log_return += log_return / len(log_returns)
avg_log_return

0.08279999999999998

In [114]:
avg_log_return == 0.0828

False

### Exercise 2: Calculate Total Log Returns

In [127]:
portfolio = [100, 120, 100, 80, 155]
log_returns = []

for i in range(1, len(portfolio)):
  log_returns.append(np.log(portfolio[i] / portfolio[i-1]))
log_returns

[np.float64(0.1823215567939546),
 np.float64(-0.1823215567939546),
 np.float64(-0.2231435513142097),
 np.float64(0.661398482245365)]

In [128]:
#sum of log returns
100 * np.exp(np.sum(log_returns)) == 155.0

np.True_

### Exercise 3: Calculate Cumulative Log Returns

In [139]:
portfolio = [100, 120, 100, 80, 155]
cum_log_returns = [] # list
cumulative_sum = 0 # scalar (number)

In [140]:
for i in range(1, len(portfolio)):
  log_returns = np.log(portfolio[i] / portfolio[i-1])
  cumulative_sum += log_returns # adds number to number
  cum_log_returns.append(cumulative_sum) # stores each cumulative value
cum_log_returns

[np.float64(0.1823215567939546),
 np.float64(0.0),
 np.float64(-0.2231435513142097),
 np.float64(0.4382549309311553)]

In [141]:
cum_log_returns == [np.log(120/100), np.log(120/100) + np.log(100/120), np.log(120/100) + np.log(100/120) + np.log(80/100), np.log(120/100) + np.log(100/120) + np.log(80/100) + np.log(155/80)]

True