### Vectorization
- Python for-loops are slow
- NumPy offers vectorization

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("ticker-fb.csv", index_col=0, parse_dates=True)

In [3]:
data.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,181.580002,177.550003,177.679993,181.419998,18151900,181.419998
2018-01-03,184.779999,181.330002,181.880005,184.669998,16886600,184.669998
2018-01-04,186.210007,184.100006,184.899994,184.330002,13880900,184.330002
2018-01-05,186.899994,184.929993,185.589996,186.850006,13574500,186.850006
2018-01-08,188.899994,186.330002,187.199997,188.279999,17994700,188.279999


In [4]:
arr = data['Volume'].iloc[:12].values

In [5]:
arr

array([18151900, 16886600, 13880900, 13574500, 17994700, 12393100,
       10529900,  9588600, 77551300, 36183800, 27992400, 23304900])

In [6]:
arr.max()

77551300

In [7]:
arr.argmax()

8

In [8]:
arr[8]

77551300

In [9]:
arr.min()

9588600

In [10]:
arr.argmin()

7

In [11]:
arr

array([18151900, 16886600, 13880900, 13574500, 17994700, 12393100,
       10529900,  9588600, 77551300, 36183800, 27992400, 23304900])

In [12]:
arr.sum()

278032600

In [15]:
data.values.shape

(756, 6)

In [16]:
np.sum(data)

High         1.500134e+05
Low          1.461565e+05
Open         1.480847e+05
Close        1.481633e+05
Volume       1.670523e+10
Adj Close    1.481633e+05
dtype: float64

In [17]:
np.sum(data, axis=1)

Date
2018-01-02    1.815280e+07
2018-01-03    1.688752e+07
2018-01-04    1.388182e+07
2018-01-05    1.357543e+07
2018-01-08    1.799564e+07
                  ...     
2020-12-24    6.703340e+06
2020-12-28    2.330107e+07
2020-12-29    1.638439e+07
2020-12-30    1.180517e+07
2020-12-31    1.289427e+07
Length: 756, dtype: float64

In [18]:
np.max(data, axis=1)

Date
2018-01-02    18151900.0
2018-01-03    16886600.0
2018-01-04    13880900.0
2018-01-05    13574500.0
2018-01-08    17994700.0
                 ...    
2020-12-24     6702000.0
2020-12-28    23299700.0
2020-12-29    16383000.0
2020-12-30    11803800.0
2020-12-31    12892900.0
Length: 756, dtype: float64

In [20]:
np.log(data['Close'].pct_change() + 1)

Date
2018-01-02         NaN
2018-01-03    0.017756
2018-01-04   -0.001843
2018-01-05    0.013579
2018-01-08    0.007624
                ...   
2020-12-24   -0.002652
2020-12-28    0.035272
2020-12-29   -0.000795
2020-12-30   -0.017899
2020-12-31    0.004734
Name: Close, Length: 756, dtype: float64

In [21]:
np.sqrt(data['Volume'])

Date
2018-01-02    4260.504665
2018-01-03    4109.330846
2018-01-04    3725.707986
2018-01-05    3684.358832
2018-01-08    4242.016030
                 ...     
2020-12-24    2588.822126
2020-12-28    4826.976279
2020-12-29    4047.591877
2020-12-30    3435.665874
2020-12-31    3590.668461
Name: Volume, Length: 756, dtype: float64

In [22]:
data['Volume']**0.5

Date
2018-01-02    4260.504665
2018-01-03    4109.330846
2018-01-04    3725.707986
2018-01-05    3684.358832
2018-01-08    4242.016030
                 ...     
2020-12-24    2588.822126
2020-12-28    4826.976279
2020-12-29    4047.591877
2020-12-30    3435.665874
2020-12-31    3590.668461
Name: Volume, Length: 756, dtype: float64