In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

# Time Series

### Using python's built-in modules

In [6]:
from datetime import datetime, timedelta, time
now = datetime.now()
now

datetime.datetime(2020, 6, 1, 0, 22, 30, 987909)

In [7]:
now.year, now.month, now.day

(2020, 6, 1)

In [8]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

datetime.timedelta(days=926, seconds=56700)

In [9]:
delta.days

926

In [10]:
delta.seconds

56700

In [11]:
start = datetime(2011, 1, 7)

In [12]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [13]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

In [14]:
date = datetime(2012, 3, 31)
date.strftime('%y-%m-%d')

'12-03-31'

### Time Series Basic

In [16]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
df = pd.Series(np.random.rand(6), index=dates)
df

2011-01-02    0.786911
2011-01-05    0.177761
2011-01-07    0.495151
2011-01-08    0.157026
2011-01-10    0.774370
2011-01-12    0.276671
dtype: float64

In [17]:
df.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [18]:
df + df[::2]

2011-01-02    1.573823
2011-01-05         NaN
2011-01-07    0.990303
2011-01-08         NaN
2011-01-10    1.548741
2011-01-12         NaN
dtype: float64

In [19]:
stamp = df.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

### Indexing, Selecting, subsetting

In [21]:
stamp = df.index[2]
df[stamp]

0.49515127428681904

In [22]:
long_date = pd.Series(np.random.rand(1000), 
                      index=pd.date_range('1/1/2000', periods=1000))
long_date

2000-01-01    0.941961
2000-01-02    0.519248
2000-01-03    0.803762
2000-01-04    0.323619
2000-01-05    0.393037
                ...   
2002-09-22    0.968056
2002-09-23    0.085217
2002-09-24    0.676079
2002-09-25    0.552371
2002-09-26    0.553725
Freq: D, Length: 1000, dtype: float64

In [23]:
long_date['2001']

2001-01-01    0.212678
2001-01-02    0.043651
2001-01-03    0.398424
2001-01-04    0.492979
2001-01-05    0.833361
                ...   
2001-12-27    0.381743
2001-12-28    0.427366
2001-12-29    0.802942
2001-12-30    0.867574
2001-12-31    0.360173
Freq: D, Length: 365, dtype: float64

In [24]:
long_date['1/6/2011':'1/11/2011']

Series([], Freq: D, dtype: float64)

In [141]:
np.random.seed(7)
dates = pd.date_range('1/1/2000', periods=100)
columns = ['california', 'texas', 'new york', 'florida']

long_df = pd.DataFrame(np.random.randn(100, 4), index=dates, columns=columns)
long_df

Unnamed: 0,california,texas,new york,florida
2000-01-01,1.690526,-0.465937,0.032820,0.407516
2000-01-02,-0.788923,0.002066,-0.000890,-1.754724
2000-01-03,1.017658,0.600499,-0.625429,-0.171548
2000-01-04,0.505299,-0.261356,-0.242749,-1.453241
2000-01-05,0.554580,0.123881,0.274460,-1.526525
...,...,...,...,...
2000-04-05,0.926302,-0.049002,-0.308604,-0.947348
2000-04-06,-3.082505,-1.518000,-0.069594,-0.175262
2000-04-07,0.403560,-0.126687,0.159358,0.787799
2000-04-08,-0.885045,-0.287158,-1.607213,0.972893


In [174]:
long_df.loc[:'05-2001']

Unnamed: 0,california,texas,new york,florida
2000-01-01,1.690526,-0.465937,0.032820,0.407516
2000-01-02,-0.788923,0.002066,-0.000890,-1.754724
2000-01-03,1.017658,0.600499,-0.625429,-0.171548
2000-01-04,0.505299,-0.261356,-0.242749,-1.453241
2000-01-05,0.554580,0.123881,0.274460,-1.526525
...,...,...,...,...
2000-04-05,0.926302,-0.049002,-0.308604,-0.947348
2000-04-06,-3.082505,-1.518000,-0.069594,-0.175262
2000-04-07,0.403560,-0.126687,0.159358,0.787799
2000-04-08,-0.885045,-0.287158,-1.607213,0.972893


### Duplicate indices

In [178]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [184]:
dup_ts.index.is_unique

False

In [186]:
dup_ts['1/3/2000']

4

In [187]:
dup_ts['1/2/2000']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [189]:
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [194]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64