# Time Series

In [1]:
import numpy as np
import pandas as pd

In [3]:
# import Python's datetime module

import datetime

 

# weekdays as a tuple

weekDays = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")

 

# Find out what day of the week is this year's Christmas

 

thisXMas    = datetime.date(2019,12,25)

thisXMasDay = thisXMas.weekday()

thisXMasDayAsString = weekDays[thisXMasDay]

 

print("This year's Christmas is on a {}".format(thisXMasDayAsString))

 

# Find out what day of the week next new year is

nextNewYear     = datetime.date(2020,1,1)

nextNewYearDay  = nextNewYear.weekday()

nextNewYearDayAsString = weekDays[nextNewYearDay]

 

print("Next new year is on a {}".format(nextNewYearDayAsString))

This year's Christmas is on a Wednesday
Next new year is on a Wednesday


## Time Series Basics

In [2]:
from datetime import datetime
np.random.seed(12345)
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [3]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [4]:
ts

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [6]:
ts[::2]
#間隔為2

2011-01-02   -0.204708
2011-01-07   -0.519439
2011-01-10    1.965781
dtype: float64

In [7]:
ts+ts[::2]

2011-01-02   -0.409415
2011-01-05         NaN
2011-01-07   -1.038877
2011-01-08         NaN
2011-01-10    3.931561
2011-01-12         NaN
dtype: float64

In [7]:
ts.index.dtype

dtype('<M8[ns]')

In [8]:
 ts.index[0]

Timestamp('2011-01-02 00:00:00')

In [9]:
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting
可以用這些方式來找value

In [10]:
stamp = ts.index[2]
ts[stamp]
# stamp

-0.5194387150567381

In [11]:
ts['1/10/2011']
#As a convenience, you can also pass a string that is interpretable as a date:

1.9657805725027142

In [12]:
ts['20110110']

1.9657805725027142

In [13]:
longer_ts = pd.Series(np.random.randn(100),
                      index=pd.date_range('1/1/2019', periods=100))
longer_ts[:40]


2019-01-01    0.092908
2019-01-02    0.281746
2019-01-03    0.769023
2019-01-04    1.246435
2019-01-05    1.007189
2019-01-06   -1.296221
2019-01-07    0.274992
2019-01-08    0.228913
2019-01-09    1.352917
2019-01-10    0.886429
2019-01-11   -2.001637
2019-01-12   -0.371843
2019-01-13    1.669025
2019-01-14   -0.438570
2019-01-15   -0.539741
2019-01-16    0.476985
2019-01-17    3.248944
2019-01-18   -1.021228
2019-01-19   -0.577087
2019-01-20    0.124121
2019-01-21    0.302614
2019-01-22    0.523772
2019-01-23    0.000940
2019-01-24    1.343810
2019-01-25   -0.713544
2019-01-26   -0.831154
2019-01-27   -2.370232
2019-01-28   -1.860761
2019-01-29   -0.860757
2019-01-30    0.560145
2019-01-31   -1.265934
2019-02-01    0.119827
2019-02-02   -1.063512
2019-02-03    0.332883
2019-02-04   -2.359419
2019-02-05   -0.199543
2019-02-06   -1.541996
2019-02-07   -0.970736
2019-02-08   -1.307030
2019-02-09    0.286350
Freq: D, dtype: float64

In [14]:
#列出所有20
longer_ts['2019-01']

2019-01-01    0.092908
2019-01-02    0.281746
2019-01-03    0.769023
2019-01-04    1.246435
2019-01-05    1.007189
2019-01-06   -1.296221
2019-01-07    0.274992
2019-01-08    0.228913
2019-01-09    1.352917
2019-01-10    0.886429
2019-01-11   -2.001637
2019-01-12   -0.371843
2019-01-13    1.669025
2019-01-14   -0.438570
2019-01-15   -0.539741
2019-01-16    0.476985
2019-01-17    3.248944
2019-01-18   -1.021228
2019-01-19   -0.577087
2019-01-20    0.124121
2019-01-21    0.302614
2019-01-22    0.523772
2019-01-23    0.000940
2019-01-24    1.343810
2019-01-25   -0.713544
2019-01-26   -0.831154
2019-01-27   -2.370232
2019-01-28   -1.860761
2019-01-29   -0.860757
2019-01-30    0.560145
2019-01-31   -1.265934
Freq: D, dtype: float64

In [15]:
longer_ts['2019']

2019-01-01    0.092908
2019-01-02    0.281746
2019-01-03    0.769023
2019-01-04    1.246435
2019-01-05    1.007189
2019-01-06   -1.296221
2019-01-07    0.274992
2019-01-08    0.228913
2019-01-09    1.352917
2019-01-10    0.886429
2019-01-11   -2.001637
2019-01-12   -0.371843
2019-01-13    1.669025
2019-01-14   -0.438570
2019-01-15   -0.539741
2019-01-16    0.476985
2019-01-17    3.248944
2019-01-18   -1.021228
2019-01-19   -0.577087
2019-01-20    0.124121
2019-01-21    0.302614
2019-01-22    0.523772
2019-01-23    0.000940
2019-01-24    1.343810
2019-01-25   -0.713544
2019-01-26   -0.831154
2019-01-27   -2.370232
2019-01-28   -1.860761
2019-01-29   -0.860757
2019-01-30    0.560145
                ...   
2019-03-12   -0.622274
2019-03-13   -0.921169
2019-03-14   -0.726213
2019-03-15    0.222896
2019-03-16    0.051316
2019-03-17   -1.157719
2019-03-18    0.816707
2019-03-19    0.433610
2019-03-20    1.010737
2019-03-21    1.824875
2019-03-22   -0.997518
2019-03-23    0.850591
2019-03-24 

In [16]:
#某段時間開始後的所有時間
ts[datetime(2011, 1, 7):]

2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [17]:
ts

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [18]:
ts['1/6/2011':'1/11/2011']

2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
dtype: float64

In [31]:
#截掉 after='1/7/2011'以後的index value
ts.truncate(after='1/7/2011')

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
dtype: float64

In [32]:
#date_range: 產生指定範圍的日期
dates = pd.date_range('1/1/2019', periods=100, freq='W-THU')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Taiwan', 'Japan',
                                'New York', 'Hong Kong'])
long_df[:10]

Unnamed: 0,Taiwan,Japan,New York,Hong Kong
2019-01-03,0.153881,-0.274084,-1.784926,0.981007
2019-01-10,-0.873717,-1.015634,-0.411244,1.465621
2019-01-17,-1.006219,-0.902148,0.752769,-0.490509
2019-01-24,-0.524672,-0.699196,0.352361,0.068103
2019-01-31,-0.930342,0.8454,0.016472,0.844963
2019-02-07,1.850834,0.022074,-1.369179,0.887204
2019-02-14,0.014331,-0.074155,-0.048565,1.235021
2019-02-21,-0.433295,1.391035,0.820211,-0.247423
2019-02-28,0.302271,0.54398,-0.942369,-1.266383
2019-03-07,0.93725,-0.720102,-1.593952,-0.375498


In [21]:
long_df.loc['2-2019']

Unnamed: 0,Taiwan,Japan,New York,Hong Kong
2019-02-07,0.107657,-0.606545,-0.417064,-0.017007
2019-02-14,-1.224145,-1.80084,1.634736,0.989008
2019-02-21,0.45794,0.555154,1.30672,-0.440554
2019-02-28,-0.30135,0.498791,-0.823991,1.320566


### Time Series with Duplicate Indices

In [22]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [23]:
#判斷index是否唯一值
dup_ts.index.is_unique

False

In [24]:
dup_ts['1/3/2000']  # not duplicated


4

In [25]:
dup_ts['1/2/2000']  # duplicated

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [26]:
grouped = dup_ts.groupby(level=0)
grouped.mean()


2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [27]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64