In [5]:
from datetime import datetime
import numpy as np
import pandas as pd

In [6]:

dates = [datetime(2011,1,2),datetime(2011,1,5),
         datetime(2011,1,7),datetime(2011,1,8),
         datetime(2011,1,10),datetime(2011,1,12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    0.399293
2011-01-05   -0.048381
2011-01-07   -0.196381
2011-01-08    1.273113
2011-01-10   -0.752740
2011-01-12   -0.699578
dtype: float64

In [7]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [8]:
ts + ts[::2]

2011-01-02    0.798586
2011-01-05         NaN
2011-01-07   -0.392762
2011-01-08         NaN
2011-01-10   -1.505480
2011-01-12         NaN
dtype: float64

In [9]:
ts.index.dtype

dtype('<M8[ns]')

In [10]:

ts.index[0]

Timestamp('2011-01-02 00:00:00')

#  Indexing, Selection, Subsetting

In [11]:
stamp = ts.index[2]
ts[stamp]

-0.19638120140634094

In [12]:
ts['1/10/2011']

-0.7527397927990152

In [13]:
ts['20110110']

-0.7527397927990152

In [14]:
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))

In [15]:
longer_ts

2000-01-01   -0.694806
2000-01-02    2.040719
2000-01-03   -1.218191
2000-01-04   -0.200658
2000-01-05   -0.381461
                ...   
2002-09-22    0.246239
2002-09-23    0.830209
2002-09-24    1.335649
2002-09-25    0.085287
2002-09-26    0.616189
Freq: D, Length: 1000, dtype: float64

In [16]:
longer_ts['2001-01']

2001-01-01   -1.653630
2001-01-02    1.085521
2001-01-03   -0.121959
2001-01-04    1.885161
2001-01-05    0.576755
2001-01-06   -1.611150
2001-01-07   -1.916065
2001-01-08   -0.910247
2001-01-09   -1.197614
2001-01-10   -1.519612
2001-01-11   -0.879196
2001-01-12   -1.631748
2001-01-13   -0.516753
2001-01-14    0.351092
2001-01-15    1.859510
2001-01-16   -1.224722
2001-01-17   -0.636187
2001-01-18   -1.331043
2001-01-19   -1.471820
2001-01-20   -1.563393
2001-01-21    0.775048
2001-01-22   -1.855444
2001-01-23    1.058685
2001-01-24   -2.044780
2001-01-25   -0.942257
2001-01-26    2.894842
2001-01-27    0.179979
2001-01-28    1.711295
2001-01-29   -0.019379
2001-01-30   -0.789779
2001-01-31   -1.610618
Freq: D, dtype: float64

In [17]:
longer_ts[datetime(2001,1,7):datetime(2001,1,31):2]

2001-01-07   -1.916065
2001-01-09   -1.197614
2001-01-11   -0.879196
2001-01-13   -0.516753
2001-01-15    1.859510
2001-01-17   -0.636187
2001-01-19   -1.471820
2001-01-21    0.775048
2001-01-23    1.058685
2001-01-25   -0.942257
2001-01-27    0.179979
2001-01-29   -0.019379
2001-01-31   -1.610618
Freq: 2D, dtype: float64

In [18]:

ts.truncate(after='1/9/2011')

2011-01-02    0.399293
2011-01-05   -0.048381
2011-01-07   -0.196381
2011-01-08    1.273113
dtype: float64

In [19]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')

In [20]:
dates

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

In [21]:
long_df = pd.DataFrame(np.random.randn(100,4), 
                      index=dates,
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [22]:
long_df

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.000416,0.733781,-0.110181,0.263645
2000-01-12,-0.174775,0.079907,-0.029149,-2.121991
2000-01-19,0.697711,0.121743,-0.681553,-0.790961
2000-01-26,0.280017,-0.983001,0.087499,-1.447122
2000-02-02,0.059947,0.053962,-0.382752,-1.250173
...,...,...,...,...
2001-10-31,0.210693,0.401971,-0.034762,2.457950
2001-11-07,2.236855,-0.289472,-0.499135,-0.102749
2001-11-14,0.324781,-0.276458,0.588136,0.633396
2001-11-21,0.173394,-0.866048,1.447697,1.259701


# Time Series with Duplicate Indices

In [23]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000', '1/3/2000'])
dates

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-02',
               '2000-01-03'],
              dtype='datetime64[ns]', freq=None)

In [24]:
dup_ts = pd.DataFrame(np.arange(5), index=dates)
dup_ts

Unnamed: 0,0
2000-01-01,0
2000-01-02,1
2000-01-02,2
2000-01-02,3
2000-01-03,4


In [25]:
dup_ts.index.is_unique

False

In [26]:
dup_ts.loc['1/3/2000']

0    4
Name: 2000-01-03 00:00:00, dtype: int32

In [27]:
dup_ts.loc['1/2/2000']

Unnamed: 0,0
2000-01-02,1
2000-01-02,2
2000-01-02,3


In [28]:
grouped = dup_ts.groupby(level=0)

In [29]:
grouped.mean()

Unnamed: 0,0
2000-01-01,0
2000-01-02,2
2000-01-03,4


In [30]:
grouped.count()

Unnamed: 0,0
2000-01-01,1
2000-01-02,3
2000-01-03,1
