# Pandas Time Series Analysis Tutorial: Handling Holidays

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday

from datetime import datetime

%matplotlib inline

In [2]:
data = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AAPL?period1=1593937224&period2=1625473224&interval=1d&events=history&includeAdjustedClose=true',sep=',')

In [3]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-07-06,92.5,93.945,92.467499,93.462502,92.83696,118655600
1,2020-07-07,93.852501,94.654999,93.057503,93.172501,92.548889,112424400
2,2020-07-08,94.18,95.375,94.089996,95.342499,94.704369,117092000
3,2020-07-09,96.262497,96.317497,94.672501,95.752502,95.111641,125642800
4,2020-07-10,95.334999,95.980003,94.705002,95.919998,95.278015,90257200


In [4]:
data.drop(columns=['Date'], inplace=True)
data.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,92.5,93.945,92.467499,93.462502,92.83696,118655600
1,93.852501,94.654999,93.057503,93.172501,92.548889,112424400
2,94.18,95.375,94.089996,95.342499,94.704369,117092000
3,96.262497,96.317497,94.672501,95.752502,95.111641,125642800
4,95.334999,95.980003,94.705002,95.919998,95.278015,90257200


In [5]:
data.shape

(252, 6)

In [6]:
rang = pd.date_range(start='07/06/2020',periods=252,freq='B')
rang

DatetimeIndex(['2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-10', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-16', '2020-07-17',
               ...
               '2021-06-09', '2021-06-10', '2021-06-11', '2021-06-14',
               '2021-06-15', '2021-06-16', '2021-06-17', '2021-06-18',
               '2021-06-21', '2021-06-22'],
              dtype='datetime64[ns]', length=252, freq='B')

#### Using CustomBusinessDay to generate US holidays calendar frequency

In [7]:
usb = CustomBusinessDay(calendar=USFederalHolidayCalendar())
usb

<CustomBusinessDay>

In [8]:
rang = pd.date_range(start='07/06/2020',periods=252,freq=usb)
rang

DatetimeIndex(['2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-10', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-16', '2020-07-17',
               ...
               '2021-06-22', '2021-06-23', '2021-06-24', '2021-06-25',
               '2021-06-28', '2021-06-29', '2021-06-30', '2021-07-01',
               '2021-07-02', '2021-07-06'],
              dtype='datetime64[ns]', length=252, freq='C')

In [9]:
data.set_index(rang,inplace=True)
data.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2020-07-06,92.5,93.945,92.467499,93.462502,92.83696,118655600
2020-07-07,93.852501,94.654999,93.057503,93.172501,92.548889,112424400
2020-07-08,94.18,95.375,94.089996,95.342499,94.704369,117092000
2020-07-09,96.262497,96.317497,94.672501,95.752502,95.111641,125642800
2020-07-10,95.334999,95.980003,94.705002,95.919998,95.278015,90257200


[You can define your own calendar using AbstractHolidayCalendar as shown below. USFederalHolidayCalendar is the only calendar available in pandas library and it serves as an example for those who want to write their own custom calendars. Here is the link for USFederalHolidayCalendar implementation here](https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/holiday.py)

### AbstractHolidayCalendar

In [10]:
# create costom holiday calender
class myBirthdayCalendar(AbstractHolidayCalendar):
    '''''
    US Federal Goverment Holiday Calendar based on rules specify by:
    http://www.opm.gov/policy-data-oversight/
    snow-dismissal-procedures/federal-holidays/
    '''''
    rules = [
        Holiday('John"s Birthday', month=7, day=16)
    ]
myc = CustomBusinessDay(calendar=myBirthdayCalendar())
myc

<CustomBusinessDay>

In [11]:
pd.date_range(start='07/06/2020',periods=252, freq=myc)

DatetimeIndex(['2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-10', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-17', '2020-07-20',
               ...
               '2021-06-10', '2021-06-11', '2021-06-14', '2021-06-15',
               '2021-06-16', '2021-06-17', '2021-06-18', '2021-06-21',
               '2021-06-22', '2021-06-23'],
              dtype='datetime64[ns]', length=252, freq='C')

### CustomBusinessDay

Weekend in egypt is Friday and Saturday. Sunday is just a normal weekday and you can handle this custom week schedule using CystomBysinessDay with weekmask as shown below

In [12]:
egypt_weekdays = "Sun Mon Tue Wed Thu"

b = CustomBusinessDay(weekmask=egypt_weekdays)

pd.date_range(start="7/6/2020",periods=252,freq=b)

DatetimeIndex(['2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-12', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-16', '2020-07-19',
               ...
               '2021-06-09', '2021-06-10', '2021-06-13', '2021-06-14',
               '2021-06-15', '2021-06-16', '2021-06-17', '2021-06-20',
               '2021-06-21', '2021-06-22'],
              dtype='datetime64[ns]', length=252, freq='C')

### You can also add holidays to this custom business day frequency

In [13]:
b = CustomBusinessDay(holidays=['2020-07-04', '2020-07-10'], weekmask=egypt_weekdays)

pd.date_range(start="7/6/2020",periods=252,freq=b)

DatetimeIndex(['2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-12', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-16', '2020-07-19',
               ...
               '2021-06-09', '2021-06-10', '2021-06-13', '2021-06-14',
               '2021-06-15', '2021-06-16', '2021-06-17', '2021-06-20',
               '2021-06-21', '2021-06-22'],
              dtype='datetime64[ns]', length=252, freq='C')

### Mathematical operations on date object using custom business day

In [14]:

dt = datetime(2020,7,9)
dt

datetime.datetime(2020, 7, 9, 0, 0)

In [15]:
dt + 1*b

Timestamp('2020-07-12 00:00:00')