In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1.다른 자료형을 시계열 객체로 변환

### 1-1.문자열 -> Timestamp

In [2]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/github/딥러닝을 위한 파이썬/Pandas/data/05000266/part5/stock-data.csv')

print(df.info(), '\n') # Date의 자료형이 object인것을 확인 할수있다.

# Date dtype:object -> datetime64
df['Date'] = pd.to_datetime(df['Date'])

# 변경후 자료형 확인
print(df.info(), '\n')

# Date를 행 인덱스로 지정
df = df.set_index('Date')
print(f'DataFrame:\n{df.head(10)}')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.1+ KB
None 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    20 non-null     datetime64[ns]
 1   Close   20 non-null     int64         
 2   Start   20 non-null     int64         
 3   High    20 non-null     int64         
 4   Low     20 non-null     int64         
 5   Volume  20 non-null     int64         
dtypes: datetime64[ns](1), int64(5)
memory usage: 1.1 KB
None 

DataFrame:
            Close  Start

### 1-2.Timestamp -> Peroid

In [3]:
import pandas as pd

dates = ['2019-06-12', '2020-06-12', '2021-06-12']

# dtype:object -> datetime64
ts_dates = pd.to_datetime(dates)
print(f'Timestamp:\n{ts_dates}\n')

# dtype:datetime64 -> period
pr_day = ts_dates.to_period(freq='D')
print(f'Period(Day):\n{pr_day}')
pr_month = ts_dates.to_period(freq='M')
print(f'Period(Month):\n{pr_month}')
pr_year = ts_dates.to_period(freq='A')
print(f'Period(Year):\n{pr_year}')

Timestamp:
DatetimeIndex(['2019-06-12', '2020-06-12', '2021-06-12'], dtype='datetime64[ns]', freq=None)

Period(Day):
PeriodIndex(['2019-06-12', '2020-06-12', '2021-06-12'], dtype='period[D]', freq='D')
Period(Month):
PeriodIndex(['2019-06', '2020-06', '2021-06'], dtype='period[M]', freq='M')
Period(Year):
PeriodIndex(['2019', '2020', '2021'], dtype='period[A-DEC]', freq='A-DEC')


## 2.시계열 데이터 만들기

### 2-1.Timestamp 배열

In [4]:
import pandas as pd

ts_ms = pd.date_range(start='2021-01-01',   # 시작일 
                      end=None,             # 마지말 일
                      periods=6,            # Timestamp 개수
                      freq='MS',            # 시간 간격
                      tz='Asia/Seoul')      # 시간대(timezone)
print(f'ts_ms:\n{ts_ms}\n')

ts_m = pd.date_range('2021-01-01', periods=6, freq = 'M', tz = 'Asia/Seoul')
print(f'ts_m:\n{ts_m}\n')

ts_2m = pd.date_range('2021-01-01', periods=6,freq='2M', tz='Asia/Seoul')
print(f'ts_2m:\n{ts_2m}')

ts_ms:
DatetimeIndex(['2021-01-01 00:00:00+09:00', '2021-02-01 00:00:00+09:00',
               '2021-03-01 00:00:00+09:00', '2021-04-01 00:00:00+09:00',
               '2021-05-01 00:00:00+09:00', '2021-06-01 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='MS')

ts_m:
DatetimeIndex(['2021-01-31 00:00:00+09:00', '2021-02-28 00:00:00+09:00',
               '2021-03-31 00:00:00+09:00', '2021-04-30 00:00:00+09:00',
               '2021-05-31 00:00:00+09:00', '2021-06-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='M')

ts_2m:
DatetimeIndex(['2021-01-31 00:00:00+09:00', '2021-03-31 00:00:00+09:00',
               '2021-05-31 00:00:00+09:00', '2021-07-31 00:00:00+09:00',
               '2021-09-30 00:00:00+09:00', '2021-11-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='2M')


### 2-2.Period 배열

In [5]:
import pandas as pd

pr_m = pd.period_range(start='2021-01-01', # 시작일 
                       end = None,         # 마지말 일
                       periods=3,          # Period 개수
                       freq='M')           # 기간의 길이
print(f'pr_m:\n{pr_m}\n')

pr_h = pd.period_range('2021-01-01', periods=3, freq='H')
print(f'pr_h:\n{pr_h}\n')

pr_2h = pd.period_range('2021-01-01', periods=3, freq='2H')
print(f'pr_2h:\n{pr_2h}')

pr_m:
PeriodIndex(['2021-01', '2021-02', '2021-03'], dtype='period[M]', freq='M')

pr_h:
PeriodIndex(['2021-01-01 00:00', '2021-01-01 01:00', '2021-01-01 02:00'], dtype='period[H]', freq='H')

pr_2h:
PeriodIndex(['2021-01-01 00:00', '2021-01-01 02:00', '2021-01-01 04:00'], dtype='period[2H]', freq='2H')


## 3.시계열 데이터 활용

### 3-1.날짜 데이터 분리

In [6]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/github/딥러닝을 위한 파이썬/Pandas/data/05000266/part5/stock-data.csv')

# Date dtype:object -> datetime64
df['Date'] = pd.to_datetime(df['Date'])
print(df.info(),'\n')

df['Year'] = df['Date'].dt.year   # 연도 분리
df['Month'] = df['Date'].dt.month # 달 분리
df['Day'] = df['Date'].dt.day     # 일 분리
print(f'DataFrame:\n{df.head()}\n')

df['Date_yr'] = df['Date'].dt.to_period(freq='A') # 연도 분리
df['Date_m'] = df['Date'].dt.to_period(freq='M')  # 연도,달 분리
print(f'DataFrame:\n{df.head()}')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    20 non-null     datetime64[ns]
 1   Close   20 non-null     int64         
 2   Start   20 non-null     int64         
 3   High    20 non-null     int64         
 4   Low     20 non-null     int64         
 5   Volume  20 non-null     int64         
dtypes: datetime64[ns](1), int64(5)
memory usage: 1.1 KB
None 

DataFrame:
        Date  Close  Start   High    Low  Volume  Year  Month  Day
0 2018-07-02  10100  10850  10900  10000  137977  2018      7    2
1 2018-06-29  10700  10550  10900   9990  170253  2018      6   29
2 2018-06-28  10400  10900  10950  10150  155769  2018      6   28
3 2018-06-27  10900  10800  11050  10500  133548  2018      6   27
4 2018-06-26  10800  10900  11000  10700   63039  2018      6   26

DataFrame:
        Date  Close  Start   High    Low  ...  Year  Mont

### 3-2.날짜 인덱스 활용

In [7]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/github/딥러닝을 위한 파이썬/Pandas/data/05000266/part5/stock-data.csv')

# Date dtype:object -> datetime64
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
print(f'df.index:\n{df.index}\n')

print(f"2018:\n{df.loc['2018'].head()}\n") # 2018 데이터 선택

print(f"2018-06:\n{df.loc['2018-06'].head()}\n") # 2018-06 데이터 선택

print(f"2018-06,'Start':'Low' :\n{df.loc['2018-06', 'Start':'Low'].head()}\n") # 열범위 슬라이싱

print(f"2018-06-26:\n{df.loc['2018-06-26'].head()}\n") # 2018-06-26 데이터 선택

print(f"2018-06-25 ~ 2018-06-25:\n{df.loc['2018-06-25':'2018-06-20'].head()}\n") # 날짜 슬라이싱

# 시간 간격 계산
today = pd.to_datetime('2021-06-12')
df['time-delta'] = today - df.index
df = df.reset_index().set_index('time-delta')
print(f"1076days ~ 1082days:\n{df.loc['1076days':'1082days']}")

df.index:
DatetimeIndex(['2018-07-02', '2018-06-29', '2018-06-28', '2018-06-27',
               '2018-06-26', '2018-06-25', '2018-06-22', '2018-06-21',
               '2018-06-20', '2018-06-19', '2018-06-18', '2018-06-15',
               '2018-06-14', '2018-06-12', '2018-06-11', '2018-06-08',
               '2018-06-07', '2018-06-05', '2018-06-04', '2018-06-01'],
              dtype='datetime64[ns]', name='Date', freq=None)

2018:
            Close  Start   High    Low  Volume
Date                                          
2018-07-02  10100  10850  10900  10000  137977
2018-06-29  10700  10550  10900   9990  170253
2018-06-28  10400  10900  10950  10150  155769
2018-06-27  10900  10800  11050  10500  133548
2018-06-26  10800  10900  11000  10700   63039

2018-06:
            Close  Start   High    Low  Volume
Date                                          
2018-06-29  10700  10550  10900   9990  170253
2018-06-28  10400  10900  10950  10150  155769
2018-06-27  10900  10800  11050  10500