In [15]:
import pandas as pd
from pandas.tseries.frequencies import to_offset
import numpy
import matplotlib.pyplot as plt
import cufflinks as cf
from pandas_datareader import data
import plotly.express as px

# Load the data from the CSV file
print('Loading...')

# header=[0,1]: csv contains multi-level header, first 2 rows should be used as headers of DF
# index_col=0: use the first column (index 0) as the DataFrame's index (uniquely identify each row)
# parse_dates=[0]: first column contains date strings, pandas will automatically convert them into datetime objects
df = pd.read_csv('../data/stocks.csv', header=[0,1], index_col=0, parse_dates=[0])

print('Loaded')

Loading...
Loaded


In [33]:
df

Attributes,Adj Close,Adj Close,Adj Close,Close,Close,Close,High,High,High,Low,Low,Low,Open,Open,Open,Volume,Volume,Volume
Symbols,MSFT,GE,AAPL,MSFT,GE,AAPL,MSFT,GE,AAPL,MSFT,GE,AAPL,MSFT,GE,AAPL,MSFT,GE,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2010-01-04,24.049969,11.001803,6.583586,30.950001,14.855769,7.643214,31.100000,15.038462,7.660714,30.590000,14.567308,7.585000,30.620001,14.634615,7.622500,38409100.0,69763096.0,493729600.0
2010-01-05,24.057743,11.058768,6.594968,30.959999,14.932692,7.656429,31.100000,15.067308,7.699643,30.639999,14.855769,7.616071,30.850000,14.865385,7.664286,49749600.0,67132624.0,601904800.0
2010-01-06,23.910097,11.001803,6.490066,30.770000,14.855769,7.534643,31.080000,15.019231,7.686786,30.520000,14.846154,7.526786,30.879999,14.932692,7.656429,58182400.0,57683496.0,552160000.0
2010-01-07,23.661432,11.571475,6.478067,30.450001,15.625000,7.520714,30.700001,15.846154,7.571429,30.190001,14.836538,7.466071,30.629999,14.884615,7.562500,50559700.0,192891192.0,477131200.0
2010-01-08,23.824627,11.820707,6.521136,30.660000,15.961538,7.570714,30.879999,16.048077,7.571429,30.240000,15.644231,7.466429,30.280001,15.682692,7.510714,51197400.0,119717104.0,447610800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-24,222.238144,10.650000,131.773087,222.750000,10.650000,131.970001,223.610001,10.850000,133.460007,221.199997,10.620000,131.100006,221.419998,10.850000,131.320007,10550600.0,30049700.0,54930100.0
2020-12-28,224.443069,10.640000,136.486053,224.960007,10.640000,136.690002,226.029999,10.780000,137.339996,223.020004,10.580000,133.509995,224.449997,10.700000,133.990005,17933500.0,53796700.0,124486200.0
2020-12-29,223.634918,10.560000,134.668762,224.149994,10.560000,134.869995,227.179993,10.770000,138.789993,223.580002,10.540000,134.339996,226.309998,10.660000,138.050003,17403200.0,53035900.0,121047300.0
2020-12-30,221.170593,10.710000,133.520477,221.679993,10.710000,133.720001,225.630005,10.850000,135.990005,221.470001,10.550000,133.399994,225.229996,10.580000,135.580002,20272300.0,50621000.0,96452100.0


In [25]:
cf.set_config_file(offline=True)

aapl = df.swaplevel(axis=1).AAPL

In [26]:
aapl

Attributes,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,6.583586,7.643214,7.660714,7.585000,7.622500,493729600.0
2010-01-05,6.594968,7.656429,7.699643,7.616071,7.664286,601904800.0
2010-01-06,6.490066,7.534643,7.686786,7.526786,7.656429,552160000.0
2010-01-07,6.478067,7.520714,7.571429,7.466071,7.562500,477131200.0
2010-01-08,6.521136,7.570714,7.571429,7.466429,7.510714,447610800.0
...,...,...,...,...,...,...
2020-12-24,131.773087,131.970001,133.460007,131.100006,131.320007,54930100.0
2020-12-28,136.486053,136.690002,137.339996,133.509995,133.990005,124486200.0
2020-12-29,134.668762,134.869995,138.789993,134.339996,138.050003,121047300.0
2020-12-30,133.520477,133.720001,135.990005,133.399994,135.580002,96452100.0


In [32]:
# resample() is a method used to change the frequency of time series data.
# "W-Fri" specifies weekly resampling, with Friday as the last day of the week.
# label="left" means the resulting timestamps will be labeled with the start of the period.
weekly = aapl.resample("W-Fri").agg({
    "Open": "first",
    "Close": "last",
    "High": "max",
    "Low": "min"
})

# Each row represents a week, with:
# The opening price from the first trading day of the week
# The closing price from the last trading day of the week
# The highest price reached during the week
# The lowest price reached during the week

weekly

Attributes,Open,Close,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-08,7.622500,7.570714,7.699643,7.466071
2010-01-15,7.600000,7.354643,7.607143,7.289286
2010-01-22,7.440357,7.062500,7.698214,7.041429
2010-01-29,7.232500,6.859286,7.632500,6.794643
2010-02-05,6.870357,6.980714,7.150000,6.816071
...,...,...,...,...
2020-12-04,116.970001,122.250000,123.779999,116.809998
2020-12-11,122.309998,122.410004,125.949997,120.150002
2020-12-18,122.599998,126.660004,129.580002,121.540001
2020-12-25,125.019997,131.970001,134.410004,123.449997


In [34]:
# resample() is a method used to change the frequency of time series data.
# "W-Fri" specifies weekly resampling, with Friday as the last day of the week.
# label="left" means the resulting timestamps will be labeled with the start of the period.
weekly = aapl.resample("W-Fri").agg({
    "Open": "first",
    "Close": "last",
    "High": "max",
    "Low": "min"
})

# Each row represents a week, with:
# The opening price from the first trading day of the week
# The closing price from the last trading day of the week
# The highest price reached during the week
# The lowest price reached during the week

# subtracts 4 days from each date in the index.
# align the dates in the index with the start of each week
weekly.index = weekly.index - to_offset("4d")

weekly

Attributes,Open,Close,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,7.622500,7.570714,7.699643,7.466071
2010-01-11,7.600000,7.354643,7.607143,7.289286
2010-01-18,7.440357,7.062500,7.698214,7.041429
2010-01-25,7.232500,6.859286,7.632500,6.794643
2010-02-01,6.870357,6.980714,7.150000,6.816071
...,...,...,...,...
2020-11-30,116.970001,122.250000,123.779999,116.809998
2020-12-07,122.309998,122.410004,125.949997,120.150002
2020-12-14,122.599998,126.660004,129.580002,121.540001
2020-12-21,125.019997,131.970001,134.410004,123.449997
