## 시계열 자료 다루기

In [2]:
import numpy as np 
import pandas as pd 


### DatetimeIndex 인덱스


In [4]:
date_str = ["2018, 1, 1", "2018, 1, 4", "2018, 1, 5", "2018, 1, 6"]
idx = pd.to_datetime(date_str)
idx


DatetimeIndex(['2018-01-01', '2018-01-04', '2018-01-05', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [5]:
np.random.seed(2021)
s = pd.Series(np.random.randn(4), index=pd.to_datetime(date_str))
s

2018-01-01    1.488609
2018-01-04    0.676011
2018-01-05   -0.418451
2018-01-06   -0.806521
dtype: float64

In [6]:
pd.date_range("2021-3-1", "2021-3-31")

DatetimeIndex(['2021-03-01', '2021-03-02', '2021-03-03', '2021-03-04',
               '2021-03-05', '2021-03-06', '2021-03-07', '2021-03-08',
               '2021-03-09', '2021-03-10', '2021-03-11', '2021-03-12',
               '2021-03-13', '2021-03-14', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-20',
               '2021-03-21', '2021-03-22', '2021-03-23', '2021-03-24',
               '2021-03-25', '2021-03-26', '2021-03-27', '2021-03-28',
               '2021-03-29', '2021-03-30', '2021-03-31'],
              dtype='datetime64[ns]', freq='D')

In [7]:
pd.date_range("2021-3-1", periods=30)

DatetimeIndex(['2021-03-01', '2021-03-02', '2021-03-03', '2021-03-04',
               '2021-03-05', '2021-03-06', '2021-03-07', '2021-03-08',
               '2021-03-09', '2021-03-10', '2021-03-11', '2021-03-12',
               '2021-03-13', '2021-03-14', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-20',
               '2021-03-21', '2021-03-22', '2021-03-23', '2021-03-24',
               '2021-03-25', '2021-03-26', '2021-03-27', '2021-03-28',
               '2021-03-29', '2021-03-30'],
              dtype='datetime64[ns]', freq='D')

In [8]:
pd.date_range("2021-3-1", "2021-3-31", freq='B')

DatetimeIndex(['2021-03-01', '2021-03-02', '2021-03-03', '2021-03-04',
               '2021-03-05', '2021-03-08', '2021-03-09', '2021-03-10',
               '2021-03-11', '2021-03-12', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-22',
               '2021-03-23', '2021-03-24', '2021-03-25', '2021-03-26',
               '2021-03-29', '2021-03-30', '2021-03-31'],
              dtype='datetime64[ns]', freq='B')

In [10]:
pd.date_range('2021-1-1', '2021-12-31', freq='W')

DatetimeIndex(['2021-01-03', '2021-01-10', '2021-01-17', '2021-01-24',
               '2021-01-31', '2021-02-07', '2021-02-14', '2021-02-21',
               '2021-02-28', '2021-03-07', '2021-03-14', '2021-03-21',
               '2021-03-28', '2021-04-04', '2021-04-11', '2021-04-18',
               '2021-04-25', '2021-05-02', '2021-05-09', '2021-05-16',
               '2021-05-23', '2021-05-30', '2021-06-06', '2021-06-13',
               '2021-06-20', '2021-06-27', '2021-07-04', '2021-07-11',
               '2021-07-18', '2021-07-25', '2021-08-01', '2021-08-08',
               '2021-08-15', '2021-08-22', '2021-08-29', '2021-09-05',
               '2021-09-12', '2021-09-19', '2021-09-26', '2021-10-03',
               '2021-10-10', '2021-10-17', '2021-10-24', '2021-10-31',
               '2021-11-07', '2021-11-14', '2021-11-21', '2021-11-28',
               '2021-12-05', '2021-12-12', '2021-12-19', '2021-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

### shift 연산

In [11]:
np.random.seed(0)
ts = pd.Series(np.random.randn(4), index=pd.date_range(
    "2021-1-1", periods=4, freq="M"))
ts

2021-01-31    1.764052
2021-02-28    0.400157
2021-03-31    0.978738
2021-04-30    2.240893
Freq: M, dtype: float64

In [12]:
ts.shift(1)

2021-01-31         NaN
2021-02-28    1.764052
2021-03-31    0.400157
2021-04-30    0.978738
Freq: M, dtype: float64

In [13]:
ts.shift(-1)

2021-01-31    0.400157
2021-02-28    0.978738
2021-03-31    2.240893
2021-04-30         NaN
Freq: M, dtype: float64

### re-sample

In [15]:
ts = pd.Series(np.random.randn(100), 
    index=pd.date_range("2018-1-1", periods=100, freq="D"))

ts.tail()

2018-04-06    0.401989
2018-04-07    1.883151
2018-04-08   -1.347759
2018-04-09   -1.270485
2018-04-10    0.969397
Freq: D, dtype: float64

In [16]:
ts.resample('W').mean()

2018-01-07    0.305776
2018-01-14    0.629064
2018-01-21   -0.006910
2018-01-28    0.277065
2018-02-04   -0.144972
2018-02-11   -0.496299
2018-02-18   -0.474473
2018-02-25   -0.201222
2018-03-04   -0.775142
2018-03-11    0.052868
2018-03-18   -0.450379
2018-03-25    0.601892
2018-04-01    0.334893
2018-04-08    0.509605
2018-04-15   -0.150544
Freq: W-SUN, dtype: float64

### dt 접근자

In [20]:
s = pd.Series(pd.date_range("2020-12-25", periods=10, freq="D"))
s

0   2020-12-25
1   2020-12-26
2   2020-12-27
3   2020-12-28
4   2020-12-29
5   2020-12-30
6   2020-12-31
7   2021-01-01
8   2021-01-02
9   2021-01-03
dtype: datetime64[ns]

In [21]:
s.dt.year

0    2020
1    2020
2    2020
3    2020
4    2020
5    2020
6    2020
7    2021
8    2021
9    2021
dtype: int64

In [22]:
s.dt.month

0    12
1    12
2    12
3    12
4    12
5    12
6    12
7     1
8     1
9     1
dtype: int64

In [23]:
s.dt.week

  s.dt.week


0    52
1    52
2    52
3    53
4    53
5    53
6    53
7    53
8    53
9    53
dtype: int64

In [24]:
ts.resample('M').mean()

2018-01-31    0.168338
2018-02-28   -0.280265
2018-03-31   -0.005235
2018-04-30    0.362251
Freq: M, dtype: float64