# Pandas 4 time related concepts:

* date times
* time deltas
* time spans
* date offset


|Concept|Scalar Class|Array Class|pandas Data Type|Primary Creation Method|
|-------|------------|-----------|----------------|------------------------|
|Date time|Timestamp|DatetimeIndex|datetime64[ns], datetime64[ns,tz]|to_datetime(),date_range()|
|Time deltas|Timedelta|TimedeltaIndex|timedelta64[ns]|to_timedelta(),timedelta_range()|
|Time spans|Period|PeriodIndex|period[freq]|Period(),period_range()|
|Date offset|DateOffset|None|None|DateOffset|

In [1]:
import time
import datetime
import calendar
import pytz
import numpy as np
import pandas as pd
# import scikits.timeseries

### 1.[FUN] `pd.to_datetime()`

In [2]:
dti = pd.to_datetime(['1/1/2018', 
                      np.datetime64('2018-01-01'), 
                      datetime.datetime(2018, 1, 1)])
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

### 2.[FUN] `pd.date_range()`

In [3]:
dti = pd.date_range('2018-01-01', periods = 10, freq = 'H')
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00'],
              dtype='datetime64[ns]', freq='H')

### 3.[FUN] `to_localize("")`

In [4]:
dti = dti.tz_localize('UTC')
dti

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00', '2018-01-01 03:00:00+00:00',
               '2018-01-01 04:00:00+00:00', '2018-01-01 05:00:00+00:00',
               '2018-01-01 06:00:00+00:00', '2018-01-01 07:00:00+00:00',
               '2018-01-01 08:00:00+00:00', '2018-01-01 09:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

### 4.[FUN] `.tz_convert("")`

In [5]:
dti.tz_convert('US/Pacific')

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00', '2017-12-31 19:00:00-08:00',
               '2017-12-31 20:00:00-08:00', '2017-12-31 21:00:00-08:00',
               '2017-12-31 22:00:00-08:00', '2017-12-31 23:00:00-08:00',
               '2018-01-01 00:00:00-08:00', '2018-01-01 01:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

### 5.[FUN] `.resample()`

In [6]:
idx = pd.date_range("2019-01-01", periods = 6, freq = "H")
ts = pd.Series(range(len(idx)), index = idx)
print(ts)

2019-01-01 00:00:00    0
2019-01-01 01:00:00    1
2019-01-01 02:00:00    2
2019-01-01 03:00:00    3
2019-01-01 04:00:00    4
2019-01-01 05:00:00    5
Freq: H, dtype: int64


In [7]:
ts.resample("2H").mean()

2019-01-01 00:00:00    0.5
2019-01-01 02:00:00    2.5
2019-01-01 04:00:00    4.5
Freq: 2H, dtype: float64

### 6.[FUN] `pd.Timestamp()`, `.day_name()`, `pd.Timedelta()`

In [8]:
friday = pd.Timestamp("2019-08-23")
print(friday.day_name())
stariday = friday + pd.Timedelta("1 day")
print(stariday.day_name())
monday = friday + pd.offsets.BDay()
print(monday.day_name())

Friday
Saturday
Monday


### 7.[FUN] `pd.period_range()`

In [9]:
ps = pd.Series(pd.period_range("1/1/2011", freq = "M", periods  =3))
print(ps)

0    2011-01
1    2011-02
2    2011-03
dtype: period[M]


### 8.[FUN] `pd.DateOffset()`

In [10]:
ps = pd.Series([pd.DateOffset(1), 
                pd.DateOffset(2)])
print(ps)

0         <DateOffset>
1    <2 * DateOffsets>
dtype: object


### 9.pd.NaT

In [11]:
print(pd.Timestamp(pd.NaT))
print(pd.Timedelta(pd.NaT))
print(pd.Period(pd.NaT))
print(pd.NaT == pd.NaT)

NaT
NaT
NaT
False


# pandas TimeSeries APIs:

### Timestamp - `pd.Timestamp()`

In [12]:
pd.Timestamp(datetime.datetime(2019, 8, 21))

Timestamp('2019-08-21 00:00:00')

In [13]:
pd.Timestamp("2019-08-21")

Timestamp('2019-08-21 00:00:00')

In [14]:
pd.Timestamp(2019, 8, 21)

Timestamp('2019-08-21 00:00:00')

### Timespan - `pd.Period()`

In [15]:
pd.Period("2019-08")

Period('2019-08', 'M')

In [16]:
pd.Period("2019-08", freq = "D")

Period('2019-08-01', 'D')

### Timestamp & Period as index

In [17]:
dates = [pd.Timestamp("2019-08-21"), 
         pd.Timestamp("2019-08-22"),
         pd.Timestamp("2019-08-23")]
ts = pd.Series(np.random.randn(3), index = dates)
print(ts.index)
print("-" * 20)
print(type(ts.index))
print("-" * 20)
print(ts)

DatetimeIndex(['2019-08-21', '2019-08-22', '2019-08-23'], dtype='datetime64[ns]', freq=None)
--------------------
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
--------------------
2019-08-21    0.086143
2019-08-22    0.737983
2019-08-23   -0.669711
dtype: float64


In [18]:
periods = [pd.Period("2019-08"), 
           pd.Period("2019-07"),
           pd.Period("2019-06")]
ts = pd.Series(np.random.randn(3), index = periods)
print(ts.index)
print("-" * 20)
print(type(ts.index))
print("-" * 20)
print(ts)

PeriodIndex(['2019-08', '2019-07', '2019-06'], dtype='period[M]', freq='M')
--------------------
<class 'pandas.core.indexes.period.PeriodIndex'>
--------------------
2019-08   -1.073076
2019-07    0.786145
2019-06    0.887967
Freq: M, dtype: float64


### To Timestamp - `pd.to_datetime()`

In [19]:
# series
s = pd.Series(["Jul 31, 2009", "2010-01-10", None])
ts  = pd.to_datetime(s)
print(ts)

l = ["2005/11/23", "2010-12-31"]
tl = pd.to_datetime(l)
print()
print(tl)

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)


In [20]:
d = ["04-01-2012 10:00"]
td1 = pd.to_datetime(d)
td2 = pd.to_datetime(d, dayfirst = True)
print(td1)
print(td2)
d2 = ["14-01-2019", "01-14-2019"]
td3 = pd.to_datetime(d2)
td4 = pd.to_datetime(d2, dayfirst = True)
print(td3)
print(td4)

DatetimeIndex(['2012-04-01 10:00:00'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2019-01-14', '2019-01-14'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2019-01-14', '2019-01-14'], dtype='datetime64[ns]', freq=None)


In [21]:
print(pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]))
print(pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq = "infer"))

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')


In [22]:
print(pd.to_datetime("2010/11/12", format = "%Y/%m/%d"))
print(pd.to_datetime("12-11-2010 00:00", format = "%d-%m-%Y %H:%M"))

2010-11-12 00:00:00
2010-11-12 00:00:00


In [23]:
df = pd.DataFrame({
    "year": [2015, 2016],
    "month": [2, 3],
    "day": [4, 5],
    "hour": [2, 3]
})
print(df)
print()
print(pd.to_datetime(df))
print()
print(pd.to_datetime(df[["year", "month", "day"]]))

   year  month  day  hour
0  2015      2    4     2
1  2016      3    5     3

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]


In [24]:
try:
    pd.to_datetime(["2019-08-21", "asd"], errors = "raise")
except ValueError as e:
    print(e)

print(pd.to_datetime(["2019-08-21", "asd"], errors = "ignore"))
print(pd.to_datetime(["2019-08-21", "asd"], errors = "coerce"))

('Unknown string format:', 'asd')
Index(['2019-08-21', 'asd'], dtype='object')
DatetimeIndex(['2019-08-21', 'NaT'], dtype='datetime64[ns]', freq=None)


In [25]:
print(pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit = "s"))
print(pd.to_datetime([1349720105000, 1349806505000, 1349892905000, 1349979305000, 1350065705000], unit = "ms"))
print(pd.to_datetime([1262347200000000000]).tz_localize("US/Pacific"))
print(pd.DatetimeIndex([1262347200000000000]).tz_localize("US/Pacific"))

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2010-01-01 12:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq=None)
DatetimeIndex(['2010-01-01 12:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq=None)


In [26]:
print(pd.to_datetime([1, 2, 3], unit = "D", origin = pd.Timestamp('1960-01-01')))
print(pd.to_datetime([1, 2, 3], unit = "D"))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)


### Range of Timestamp

In [27]:
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2019, 1, 1)
index = pd.date_range(start, end)
print(index)
index = pd.bdate_range(start, end)
print(index)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2018-12-23', '2018-12-24', '2018-12-25', '2018-12-26',
               '2018-12-27', '2018-12-28', '2018-12-29', '2018-12-30',
               '2018-12-31', '2019-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
               '2018-01-11', '2018-01-12',
               ...
               '2018-12-19', '2018-12-20', '2018-12-21', '2018-12-24',
               '2018-12-25', '2018-12-26', '2018-12-27', '2018-12-28',
               '2018-12-31', '2019-01-01'],
              dtype='datetime64[ns]', length=262, freq='B')


In [29]:
print(pd.date_range(start, periods = 1000, freq = "M"))
print(pd.bdate_range(start, periods = 250, freq = "BQS"))
print(pd.date_range(start, end, freq = "BM"))
print(pd.date_range(start, end, freq = "W"))
print(pd.date_range(end = end, periods = 20))
print(pd.date_range(start = start, periods = 20))
print(pd.date_range("2018-01-01", "2018-01-05", periods = 5))
print(pd.date_range("2018-01-01", "2018-01-05", periods = 10))

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30', '2018-10-31',
               ...
               '2100-07-31', '2100-08-31', '2100-09-30', '2100-10-31',
               '2100-11-30', '2100-12-31', '2101-01-31', '2101-02-28',
               '2101-03-31', '2101-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')
DatetimeIndex(['2018-01-01', '2018-04-02', '2018-07-02', '2018-10-01',
               '2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01', '2020-04-01',
               ...
               '2078-01-03', '2078-04-01', '2078-07-01', '2078-10-03',
               '2079-01-02', '2079-04-03', '2079-07-03', '2079-10-02',
               '2080-01-01', '2080-04-01'],
              dtype='datetime64[ns]', length=250, freq='BQS-JAN')
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-30', '2018-04-30',
               '2018-0

In [32]:
weekmask = "Mon Wed Fri"
holidays = [datetime.datetime(2011, 1, 5),
            datetime.datetime(2011, 3, 14)]
print(pd.bdate_range(start, end, 
                     freq = "C", 
                     weekmask = weekmask, 
                     holidays = holidays))
print(pd.bdate_range(start, end, freq = "CBMS", weekmask = weekmask))

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05', '2018-01-08',
               '2018-01-10', '2018-01-12', '2018-01-15', '2018-01-17',
               '2018-01-19', '2018-01-22',
               ...
               '2018-12-10', '2018-12-12', '2018-12-14', '2018-12-17',
               '2018-12-19', '2018-12-21', '2018-12-24', '2018-12-26',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', length=157, freq='C')
DatetimeIndex(['2018-01-01', '2018-02-02', '2018-03-02', '2018-04-02',
               '2018-05-02', '2018-06-01', '2018-07-02', '2018-08-01',
               '2018-09-03', '2018-10-01', '2018-11-02', '2018-12-03'],
              dtype='datetime64[ns]', freq='CBMS')


In [35]:
print(pd.Timestamp.min)
print(pd.Timestamp.max)

1677-09-21 00:12:43.145225
2262-04-11 23:47:16.854775807


### Timestamp Index

In [58]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)
rng = pd.date_range(start, end, freq = "BM")
ts = pd.Series(np.random.randn(len(rng)), index = rng)
print(ts)
print(ts.index)
print(ts[:5].index)
print(ts[::2].index)

2011-01-31   -0.280422
2011-02-28    0.389996
2011-03-31   -0.467548
2011-04-29   -0.544350
2011-05-31    0.672542
2011-06-30    0.024717
2011-07-29    0.222753
2011-08-31    0.287919
2011-09-30    0.818344
2011-10-31   -0.540709
2011-11-30   -0.923925
2011-12-30   -0.920204
Freq: BM, dtype: float64
DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')
DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='BM')
DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')


In [62]:
print(ts["1/31/2011"])
print()
print(ts[datetime.datetime(2011, 12, 25):])
print()
print(ts['10/31/2011':'12/31/2011'])
print()
print(ts["2011"])
print()
print(ts["2011-6"])

-0.28042152014949867

2011-12-30   -0.920204
Freq: BM, dtype: float64

2011-10-31   -0.540709
2011-11-30   -0.923925
2011-12-30   -0.920204
Freq: BM, dtype: float64

2011-01-31   -0.280422
2011-02-28    0.389996
2011-03-31   -0.467548
2011-04-29   -0.544350
2011-05-31    0.672542
2011-06-30    0.024717
2011-07-29    0.222753
2011-08-31    0.287919
2011-09-30    0.818344
2011-10-31   -0.540709
2011-11-30   -0.923925
2011-12-30   -0.920204
Freq: BM, dtype: float64

2011-06-30    0.024717
Freq: BM, dtype: float64


In [68]:
dft = pd.DataFrame(np.random.randn(100000, 1),
                   columns = ["A"],
                   index = pd.date_range("20130101", periods = 100000, freq = "T"))
print(dft)
print()
print(dft["2013"])
print()
print(dft["2013-1":"2013-2"])
print()
print(dft["2013-1":"2013-2-28"])
print()
print(dft["2013-1":"2013-2-28 00:00:00"])
print()
print(dft["2013-1-15":"2013-1-15 12:30:00"])

                            A
2013-01-01 00:00:00  0.002961
2013-01-01 00:01:00  0.200076
2013-01-01 00:02:00 -1.295080
2013-01-01 00:03:00  0.916237
2013-01-01 00:04:00 -1.429252
2013-01-01 00:05:00  0.231172
2013-01-01 00:06:00 -1.877051
2013-01-01 00:07:00 -1.872190
2013-01-01 00:08:00  1.097244
2013-01-01 00:09:00  0.275314
2013-01-01 00:10:00 -0.091576
2013-01-01 00:11:00  0.695009
2013-01-01 00:12:00 -0.862049
2013-01-01 00:13:00  0.019264
2013-01-01 00:14:00  1.079467
2013-01-01 00:15:00  0.726813
2013-01-01 00:16:00 -0.110329
2013-01-01 00:17:00 -0.578942
2013-01-01 00:18:00 -0.380870
2013-01-01 00:19:00  0.318251
2013-01-01 00:20:00  0.141365
2013-01-01 00:21:00 -0.894815
2013-01-01 00:22:00  1.109872
2013-01-01 00:23:00 -0.056260
2013-01-01 00:24:00 -1.481539
2013-01-01 00:25:00 -0.854674
2013-01-01 00:26:00 -0.820227
2013-01-01 00:27:00  0.012029
2013-01-01 00:28:00  0.336864
2013-01-01 00:29:00  0.679630
...                       ...
2013-03-11 10:10:00 -0.727262
2013-03-11

In [71]:
dft2 = pd.DataFrame(np.random.randn(20, 1),
                    columns = ["A"],
                    index = pd.MultiIndex.from_product(
                        [pd.date_range("20130101", periods = 10, freq = "12H"),
                        ["a", "b"]]
                    ))
print(dft2)
print()
print(dft2.loc["2013-01-05"])
print()
idx = pd.IndexSlice
dft2 = dft2.swaplevel(0, 1).sort_index()
print(dft2)
print()
print(dft2.loc[idx[:, "2013-01-05"], :])

                              A
2013-01-01 00:00:00 a -0.173187
                    b -0.581849
2013-01-01 12:00:00 a -1.775048
                    b -1.200907
2013-01-02 00:00:00 a  0.450410
                    b  0.101411
2013-01-02 12:00:00 a -0.270521
                    b -0.722293
2013-01-03 00:00:00 a -1.949081
                    b  0.248722
2013-01-03 12:00:00 a -0.515341
                    b  1.046882
2013-01-04 00:00:00 a  1.593608
                    b  2.740623
2013-01-04 12:00:00 a -0.133888
                    b -0.240564
2013-01-05 00:00:00 a  0.032216
                    b -1.294595
2013-01-05 12:00:00 a  0.749883
                    b -0.978499

                              A
2013-01-05 00:00:00 a  0.032216
                    b -1.294595
2013-01-05 12:00:00 a  0.749883
                    b -0.978499

                              A
a 2013-01-01 00:00:00 -0.173187
  2013-01-01 12:00:00 -1.775048
  2013-01-02 00:00:00  0.450410
  2013-01-02 12:00:00 -0.270521
  2013