## Handling Dates and Times

#### Converting Strings to Dates

In [6]:
import numpy as np
import pandas as pd

# create strings
date_strings = np.array(['03-08-2015 12:56 AM',
                         '26-02-2003 07:45 PM'])

# convert to datetimes
[pd.to_datetime(date, format = '%d-%m-%Y %I:%M %p', 
                errors = 'coerce')\
     for date in date_strings]

[Timestamp('2015-08-03 00:56:00'), Timestamp('2003-02-26 19:45:00')]

#### Handling Time Zone

In [11]:
import pandas as pd

# create date time in time zone
pd.Timestamp('2017-05-01 06:00:00', tz = 'Europe/London')

# create date time
date = pd.Timestamp('2017-05-01 06:16:47')

# set a time zone
date_in_london = date.tz_localize('Europe/London')

# convert time zone
date_in_africa = date_in_london.tz_convert('Africa/Abidjan')

# print dates
print(f'Date in London: {date_in_london}')
print(f'Date in Africa: {date_in_africa}')

Date in London: 2017-05-01 06:16:47+01:00
Date in Africa: 2017-05-01 05:16:47+00:00


#### Show all time zones

In [12]:
from pytz import all_timezones
all_timezones[0:4]

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers']

#### Selecting Dates and Times

In [14]:
# option 1:

import pandas as od 

# create data frame
dataframe = pd.DataFrame()

# create datetimes
dataframe['date'] = pd.date_range('1/1/2015', periods = 1000000,
                                 freq = 'H')

# select observations between two dates
dataframe[(dataframe['date'] > '2016-2-1 01:00:00') &
          (dataframe['date'] <= '2016-2-1 04:00:00')]

Unnamed: 0,date
9506,2016-02-01 02:00:00
9507,2016-02-01 03:00:00
9508,2016-02-01 04:00:00


In [16]:
# option 2:

# set index
dataframe = dataframe.set_index(dataframe['date'])

# select observations between two dates
dataframe.loc['2016-2-1 01:00:00': '2016-2-1 04:00:00']

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2016-02-01 01:00:00,2016-02-01 01:00:00
2016-02-01 02:00:00,2016-02-01 02:00:00
2016-02-01 03:00:00,2016-02-01 03:00:00
2016-02-01 04:00:00,2016-02-01 04:00:00


#### Breakup date into multiple features

In [17]:
import pandas as pd

# create dates
dataframe = pd.DataFrame()
dataframe['date'] = pd.date_range('02/06/2019', periods = 150, 
                                 freq = 'W')

# separate into features
dataframe['year'] = dataframe['date'].dt.year
dataframe['month'] = dataframe['date'].dt.month
dataframe['day'] = dataframe['date'].dt.day
dataframe['hour'] = dataframe['date'].dt.hour
dataframe['minute'] = dataframe['date'].dt.minute

#### Calculating Difference between dates

In [19]:
import pandas as pd

# create two date time features
dataframe = pd.DataFrame()
dataframe['Arrived'] = [pd.Timestamp('01-01-2017'),
                        pd.Timestamp('01-04-2017')]
dataframe['Left'] = [pd.Timestamp('01-01-2017'), 
                     pd.Timestamp('01-06-2017')]

# sol 1: calculate duration between features showing days
dataframe['Left'] - dataframe['Arrived']

# sol 2: only keep the numerical values
pd.Series(delta.days for delta in (dataframe['Left'] - dataframe['Arrived']))

0    0
1    2
dtype: int64

#### Encoding Days of the Week

In [20]:
import pandas as pd

# create dates
dates = pd.Series(pd.date_range("2/2/2012", periods=3, freq="M"))

# Show days of the week
dates.dt.weekday_name

# Show days of the week (numeral)
dates.dt.weekday

0    2
1    5
2    0
dtype: int64

#### Create Lagged Feature

In [21]:
import pandas as pd

# Create data frame
dataframe = pd.DataFrame()

# Create data
dataframe["dates"] = pd.date_range("1/1/2001", periods=5, freq="D")
dataframe["stock_price"] = [1.1,2.2,3.3,4.4,5.5]

# Lagged values by one row
dataframe["previous_days_stock_price"] = dataframe["stock_price"].shift(1)

# Show data frame
dataframe

Unnamed: 0,dates,stock_price,previous_days_stock_price
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


#### Calculate a Statistic between a windows

In [22]:
# Load library
import pandas as pd

# Create datetimes
time_index = pd.date_range("01/01/2010", periods=5, freq="M")

# Create data frame, set index
dataframe = pd.DataFrame(index=time_index)

# Create feature
dataframe["Stock_Price"] = [1,2,3,4,5]

# Calculate rolling mean
dataframe.rolling(window=2).mean()

Unnamed: 0,Stock_Price
2010-01-31,
2010-02-28,1.5
2010-03-31,2.5
2010-04-30,3.5
2010-05-31,4.5


### Handling missing data in time series

#### Interpolate Missing Values

In [25]:
# Load libraries
import pandas as pd
import numpy as np

# Create date
time_index = pd.date_range("01/01/2010", periods=5, freq="M")

# Create data frame, set index
dataframe = pd.DataFrame(index=time_index)

# Create feature with a gap of missing values
dataframe["Sales"] = [1.0,2.0,np.nan,np.nan,5.0]

# Interpolate missing values
dataframe.interpolate()

# Replace missing values with last know value (forward-filling)
dataframe.ffill()


# Replace missing values with latest know value (back-filling)
dataframe.bfill()

# Interpolate missing values
dataframe.interpolate(method="quadratic")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.059808
2010-04-30,4.038069
2010-05-31,5.0
