# Using Pandas and NumPy
## Portfolio and Risk
- Pandas Datareader
- Pandas
- Numpy
- Portfolio and return

### Resources
- Pandas Datareader
https://pandas-datareader.readthedocs.io/ (https://youtu.be/sgndYho8RyI)
- Pandas https://pandas.pydata.org (https://youtu.be/m8ahf_c9hEc)
- NumPy http://numpy.org (Focus here)

### Pandas Datareader & Pandas DataFrame

In [1]:
import numpy as np
import pandas_datareader as pdr
import datetime as dt
import pandas as pd

In [2]:
start = dt.datetime(2020, 1, 1)
data = pdr.get_data_yahoo("AAPL", start)

In [3]:
data.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,75.150002,73.797501,74.059998,75.087502,135480400.0,74.333511
2020-01-03,75.144997,74.125,74.287498,74.357498,146322800.0,73.61084
2020-01-06,74.989998,73.1875,73.447502,74.949997,118387200.0,74.197395
2020-01-07,75.224998,74.370003,74.959999,74.597504,108872000.0,73.848442
2020-01-08,76.110001,74.290001,74.290001,75.797501,132079200.0,75.036385


In [4]:
data.index

DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
               '2020-01-14', '2020-01-15',
               ...
               '2021-02-10', '2021-02-11', '2021-02-12', '2021-02-16',
               '2021-02-17', '2021-02-18', '2021-02-19', '2021-02-22',
               '2021-02-23', '2021-02-24'],
              dtype='datetime64[ns]', name='Date', length=289, freq=None)

In [5]:
data.dtypes

High         float64
Low          float64
Open         float64
Close        float64
Volume       float64
Adj Close    float64
dtype: object

In [6]:
type(data)

pandas.core.frame.DataFrame

### NumPy

In [7]:
data.to_numpy()

array([[7.51500015e+01, 7.37975006e+01, 7.40599976e+01, 7.50875015e+01,
        1.35480400e+08, 7.43335114e+01],
       [7.51449966e+01, 7.41250000e+01, 7.42874985e+01, 7.43574982e+01,
        1.46322800e+08, 7.36108398e+01],
       [7.49899979e+01, 7.31875000e+01, 7.34475021e+01, 7.49499969e+01,
        1.18387200e+08, 7.41973953e+01],
       ...,
       [1.29720001e+02, 1.25599998e+02, 1.28009995e+02, 1.26000000e+02,
        1.03607600e+08, 1.26000000e+02],
       [1.26709999e+02, 1.18389999e+02, 1.23760002e+02, 1.25860001e+02,
        1.57859100e+08, 1.25860001e+02],
       [1.25559998e+02, 1.22230003e+02, 1.24940002e+02, 1.25349998e+02,
        1.10691500e+08, 1.25349998e+02]])

In [8]:
arr = data.to_numpy()

In [9]:
arr.shape

(289, 6)

In [10]:
len(data)

289

In [11]:
arr[0]

array([7.51500015e+01, 7.37975006e+01, 7.40599976e+01, 7.50875015e+01,
       1.35480400e+08, 7.43335114e+01])

In [12]:
data.head(1)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,75.150002,73.797501,74.059998,75.087502,135480400.0,74.333511


In [13]:
arr.dtype

dtype('float64')

In [14]:
small = arr[:10, 0].copy()

In [15]:
small

array([75.15000153, 75.14499664, 74.98999786, 75.22499847, 76.11000061,
       77.60749817, 78.16750336, 79.26750183, 79.39250183, 78.875     ])

In [16]:
data.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,75.150002,73.797501,74.059998,75.087502,135480400.0,74.333511
2020-01-03,75.144997,74.125,74.287498,74.357498,146322800.0,73.61084
2020-01-06,74.989998,73.1875,73.447502,74.949997,118387200.0,74.197395
2020-01-07,75.224998,74.370003,74.959999,74.597504,108872000.0,73.848442
2020-01-08,76.110001,74.290001,74.290001,75.797501,132079200.0,75.036385


In [17]:
np.max(small)

79.39250183105469

In [18]:
small.max()

79.39250183105469

In [19]:
small.argmax()

8

In [20]:
small

array([75.15000153, 75.14499664, 74.98999786, 75.22499847, 76.11000061,
       77.60749817, 78.16750336, 79.26750183, 79.39250183, 78.875     ])

In [21]:
small[small.argmax()]

79.39250183105469

In [22]:
np.log(small)

array([4.31948614, 4.31941954, 4.31735474, 4.3204836 , 4.33217967,
       4.35166405, 4.358854  , 4.37282823, 4.37440393, 4.36786432])

In [23]:
np.log(data)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,4.319486,4.301325,4.304876,4.318654,18.724338,4.308562
2020-01-03,4.319420,4.305753,4.307943,4.308885,18.801326,4.298792
2020-01-06,4.317355,4.293025,4.296571,4.316821,18.589471,4.306729
2020-01-07,4.320484,4.309053,4.316955,4.312107,18.505683,4.302015
2020-01-08,4.332180,4.307976,4.307976,4.328065,18.698912,4.317973
...,...,...,...,...,...,...
2021-02-18,4.867534,4.847410,4.861362,4.865301,18.386587,4.865301
2021-02-19,4.872981,4.858261,4.869379,4.866534,18.287445,4.866534
2021-02-22,4.865378,4.833102,4.852108,4.836282,18.456121,4.836282
2021-02-23,4.841901,4.773984,4.818344,4.835170,18.877213,4.835170


In [24]:
data/data.shift()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,,,,,,
2020-01-03,0.999933,1.004438,1.003072,0.990278,1.080029,0.990278
2020-01-06,0.997937,0.987352,0.988693,1.007968,0.809082,1.007968
2020-01-07,1.003134,1.016157,1.020593,0.995297,0.919626,0.995297
2020-01-08,1.011765,0.998924,0.991062,1.016086,1.213160,1.016086
...,...,...,...,...,...,...
2021-02-18,0.983210,0.984089,0.984381,0.991364,0.987026,0.991364
2021-02-19,1.005462,1.010910,1.008050,1.001233,0.905614,1.001233
2021-02-22,0.992426,0.975155,0.982878,0.970201,1.183736,0.970201
2021-02-23,0.976796,0.942596,0.966800,0.998889,1.523625,0.998889


In [25]:
data.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,75.150002,73.797501,74.059998,75.087502,135480400.0,74.333511
2020-01-03,75.144997,74.125,74.287498,74.357498,146322800.0,73.61084
2020-01-06,74.989998,73.1875,73.447502,74.949997,118387200.0,74.197395
2020-01-07,75.224998,74.370003,74.959999,74.597504,108872000.0,73.848442
2020-01-08,76.110001,74.290001,74.290001,75.797501,132079200.0,75.036385


In [27]:
75.144997/75.150002

0.9999333998687053

In [29]:
np.sum(np.log(data/data.shift()))

High         0.513298
Low          0.504580
Open         0.522958
Close        0.512456
Volume      -0.202080
Adj Close    0.522548
dtype: float64

In [31]:
np.log(data/data.iloc[0]).tail(1)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-24,0.513298,0.50458,0.522958,0.512456,-0.20208,0.522548


In [32]:
small.shape

(10,)

In [33]:
small

array([75.15000153, 75.14499664, 74.98999786, 75.22499847, 76.11000061,
       77.60749817, 78.16750336, 79.26750183, 79.39250183, 78.875     ])

In [34]:
small.reshape(2, 5)

array([[75.15000153, 75.14499664, 74.98999786, 75.22499847, 76.11000061],
       [77.60749817, 78.16750336, 79.26750183, 79.39250183, 78.875     ]])

In [35]:
small.reshape(10, 1)

array([[75.15000153],
       [75.14499664],
       [74.98999786],
       [75.22499847],
       [76.11000061],
       [77.60749817],
       [78.16750336],
       [79.26750183],
       [79.39250183],
       [78.875     ]])

In [36]:
small.reshape(-1, 1)

array([[75.15000153],
       [75.14499664],
       [74.98999786],
       [75.22499847],
       [76.11000061],
       [77.60749817],
       [78.16750336],
       [79.26750183],
       [79.39250183],
       [78.875     ]])

### Portfolios

In [37]:
tickers = ['AAPL', 'MSFT', 'TWTR', 'IBM']
start = dt.datetime(2020, 1, 1)

data = pdr.get_data_yahoo(tickers, start)

In [38]:
data.head()

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,High,High,...,Low,Low,Open,Open,Open,Open,Volume,Volume,Volume,Volume
Symbols,AAPL,MSFT,TWTR,IBM,AAPL,MSFT,TWTR,IBM,AAPL,MSFT,...,TWTR,IBM,AAPL,MSFT,TWTR,IBM,AAPL,MSFT,TWTR,IBM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-02,74.333511,158.571075,32.299999,126.975204,75.087502,160.619995,32.299999,135.419998,75.150002,160.729996,...,31.959999,134.770004,74.059998,158.779999,32.310001,135.0,135480400.0,22622100.0,10721100.0,3148600.0
2020-01-03,73.61084,156.596588,31.52,125.96254,74.357498,158.619995,31.52,134.339996,75.144997,159.949997,...,31.26,133.559998,74.287498,158.320007,31.709999,133.570007,146322800.0,21116200.0,14429500.0,2373700.0
2020-01-06,74.197395,157.001373,31.639999,125.737526,74.949997,159.029999,31.639999,134.100006,74.989998,159.100006,...,31.16,133.199997,73.447502,157.080002,31.23,133.419998,118387200.0,20813700.0,12582500.0,2425500.0
2020-01-07,73.848442,155.569855,32.540001,125.821907,74.597504,157.580002,32.540001,134.190002,75.224998,159.669998,...,31.719999,133.399994,74.959999,159.320007,31.799999,133.690002,108872000.0,21634100.0,13712900.0,3090800.0
2020-01-08,75.036385,158.047836,33.049999,126.872055,75.797501,160.089996,33.049999,135.309998,76.110001,160.800003,...,32.349998,133.919998,74.290001,158.929993,32.349998,134.509995,132079200.0,27746500.0,14632400.0,4346000.0


In [39]:
data = data['Adj Close']

In [40]:
data.head()

Symbols,AAPL,MSFT,TWTR,IBM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,74.333511,158.571075,32.299999,126.975204
2020-01-03,73.61084,156.596588,31.52,125.96254
2020-01-06,74.197395,157.001373,31.639999,125.737526
2020-01-07,73.848442,155.569855,32.540001,125.821907
2020-01-08,75.036385,158.047836,33.049999,126.872055


In [41]:
portfolios = [.25, .15, .40, .20]

In [42]:
np.sum(portfolios)

1.0

In [63]:
(data/data.iloc[0])*portfolios*100000

Symbols,AAPL,MSFT,TWTR,IBM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,25000.000000,15000.000000,40000.000000,20000.000000
2020-01-03,24756.949626,14813.223758,39034.057216,19840.494087
2020-01-06,24954.221177,14851.514331,39182.662708,19805.051934
2020-01-07,24836.860500,14716.100112,40297.215708,19818.342892
2020-01-08,25236.391776,14950.504296,40928.792592,19983.752826
...,...,...,...,...
2021-02-18,43624.337245,23061.266938,89486.072871,19016.311706
2021-02-19,43678.144875,22794.510337,89510.836516,18742.241584
2021-02-22,42376.580128,22183.427260,87294.117063,19036.787713
2021-02-23,42329.495244,22066.130626,90613.002969,19013.160812


In [60]:
weight = np.random.random(4)
weight /= weight.sum()
weight

array([0.33126896, 0.30945036, 0.34068727, 0.01859341])

In [61]:
weight.sum()

1.0

In [64]:
np.sum((data/data.iloc[0])*portfolios*100000, axis=1)

Date
2020-01-02    100000.000000
2020-01-03     98444.724688
2020-01-06     98793.450150
2020-01-07     99668.519212
2020-01-08    101099.441489
                  ...      
2021-02-18    175187.988760
2021-02-19    174725.733312
2021-02-22    170890.912164
2021-02-23    174021.789650
2021-02-24    172817.135930
Length: 289, dtype: float64

In [65]:
np.sum((data/data.iloc[0])*weight*100000, axis=1)

Date
2020-01-02    100000.000000
2020-01-03     98455.080169
2020-01-06     98918.748524
2020-01-07     99434.397537
2020-01-08    101000.686355
                  ...      
2021-02-18    183365.809341
2021-02-19    182882.401184
2021-02-22    178036.426173
2021-02-23    180556.610812
2021-02-24    179297.277227
Length: 289, dtype: float64