## TimeStamp 객체 생성과 인덱스 지정

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("./data/stock-data.csv")
print(df.head())
print(df.info())

         Date  Close  Start   High    Low  Volume
0  2018-07-02  10100  10850  10900  10000  137977
1  2018-06-29  10700  10550  10900   9990  170253
2  2018-06-28  10400  10900  10950  10150  155769
3  2018-06-27  10900  10800  11050  10500  133548
4  2018-06-26  10800  10900  11000  10700   63039
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
Date      20 non-null object
Close     20 non-null int64
Start     20 non-null int64
High      20 non-null int64
Low       20 non-null int64
Volume    20 non-null int64
dtypes: int64(5), object(1)
memory usage: 1.1+ KB
None


In [4]:
#문자열 데이터를 pandas의  Timestamp로 변환해서 새로운 열로 추가 
df['new_Date'] = pd.to_datetime(df['Date'])  
print(df.head())
print(df.info())

         Date  Close  Start   High    Low  Volume   new_Date
0  2018-07-02  10100  10850  10900  10000  137977 2018-07-02
1  2018-06-29  10700  10550  10900   9990  170253 2018-06-29
2  2018-06-28  10400  10900  10950  10150  155769 2018-06-28
3  2018-06-27  10900  10800  11050  10500  133548 2018-06-27
4  2018-06-26  10800  10900  11000  10700   63039 2018-06-26
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
Date        20 non-null object
Close       20 non-null int64
Start       20 non-null int64
High        20 non-null int64
Low         20 non-null int64
Volume      20 non-null int64
new_Date    20 non-null datetime64[ns]
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 1.2+ KB
None


In [5]:
print(df.type(df['new_Date']))
print(df.type(df['new_Date'][0])) 

AttributeError: 'DataFrame' object has no attribute 'type'

In [6]:
df.set_index('new_Date', inplace=True)
df.drop('Date', axis=1, inplace=True)
print(df.head())
print(df.info())

            Close  Start   High    Low  Volume
new_Date                                      
2018-07-02  10100  10850  10900  10000  137977
2018-06-29  10700  10550  10900   9990  170253
2018-06-28  10400  10900  10950  10150  155769
2018-06-27  10900  10800  11050  10500  133548
2018-06-26  10800  10900  11000  10700   63039
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 20 entries, 2018-07-02 to 2018-06-01
Data columns (total 5 columns):
Close     20 non-null int64
Start     20 non-null int64
High      20 non-null int64
Low       20 non-null int64
Volume    20 non-null int64
dtypes: int64(5)
memory usage: 960.0 bytes
None


## Timestamp를 Period로 변환

In [7]:
dates = ['2019-01-01', '2020-03-01', '2021-06-01']

In [8]:
ts_dates= pd.to_datetime(dates)  #Timestamp로 변환
print(ts_dates)

DatetimeIndex(['2019-01-01', '2020-03-01', '2021-06-01'], dtype='datetime64[ns]', freq=None)


In [9]:
pr_day = ts_dates.to_period(freq='D') #Timestamp를 Period로 변환
print(pr_day)

PeriodIndex(['2019-01-01', '2020-03-01', '2021-06-01'], dtype='period[D]', freq='D')


In [10]:
pr_month = ts_dates.to_period(freq='M')
print(pr_month)

PeriodIndex(['2019-01', '2020-03', '2021-06'], dtype='period[M]', freq='M')


In [11]:
pr_year = ts_dates.to_period(freq='A')
print(pr_year)

PeriodIndex(['2019', '2020', '2021'], dtype='period[A-DEC]', freq='A-DEC')


## 배열 형태의 시계열 데이터 생성

In [14]:
ts_ms = pd.date_range(start='2019-01-01',
                   end=None,
                   periods=6,
                   freq='MS' ,   #월의 시작일 시간 간격
                   tz = 'Asia/Seoul')
print(ts_ms)

DatetimeIndex(['2019-01-01 00:00:00+09:00', '2019-02-01 00:00:00+09:00',
               '2019-03-01 00:00:00+09:00', '2019-04-01 00:00:00+09:00',
               '2019-05-01 00:00:00+09:00', '2019-06-01 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='MS')


In [13]:
ts_me = pd.date_range(start='2019-01-01',
                   end=None,
                   periods=6,
                   freq='M' ,   #월의 마지막날 시간 간격
                   tz = 'Asia/Seoul')
print(ts_me)

DatetimeIndex(['2019-01-31 00:00:00+09:00', '2019-02-28 00:00:00+09:00',
               '2019-03-31 00:00:00+09:00', '2019-04-30 00:00:00+09:00',
               '2019-05-31 00:00:00+09:00', '2019-06-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='M')


In [15]:
ts_3m = pd.date_range(start='2019-01-01',
                   end=None,
                   periods=6,
                   freq='3M' ,   #3개월 시간 간격
                   tz = 'Asia/Seoul')
print(ts_3m)

DatetimeIndex(['2019-01-31 00:00:00+09:00', '2019-04-30 00:00:00+09:00',
               '2019-07-31 00:00:00+09:00', '2019-10-31 00:00:00+09:00',
               '2020-01-31 00:00:00+09:00', '2020-04-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='3M')


In [16]:
pr_m = pd.period_range(start='2019-01-01',
                   end=None,
                   periods=3,
                   freq='M')   #기간의 길이는 (M:월)
print(pr_m)

PeriodIndex(['2019-01', '2019-02', '2019-03'], dtype='period[M]', freq='M')


In [17]:
pr_h = pd.period_range(start='2019-01-01',
                   end=None,
                   periods=3,
                   freq='H')   #기간의 길이는 (H: 시간)
print(pr_h)

PeriodIndex(['2019-01-01 00:00', '2019-01-01 01:00', '2019-01-01 02:00'], dtype='period[H]', freq='H')


In [18]:
pr_5h = pd.period_range(start='2019-01-01',
                   end=None,
                   periods=3,
                   freq='5H')   #기간의 길이는 (5H: 5시간)
print(pr_5h)

PeriodIndex(['2019-01-01 00:00', '2019-01-01 05:00', '2019-01-01 10:00'], dtype='period[5H]', freq='5H')


---

In [20]:
df = pd.read_csv("./data/stock-data.csv")
print(df.head())
print(df.info())

         Date  Close  Start   High    Low  Volume
0  2018-07-02  10100  10850  10900  10000  137977
1  2018-06-29  10700  10550  10900   9990  170253
2  2018-06-28  10400  10900  10950  10150  155769
3  2018-06-27  10900  10800  11050  10500  133548
4  2018-06-26  10800  10900  11000  10700   63039
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
Date      20 non-null object
Close     20 non-null int64
Start     20 non-null int64
High      20 non-null int64
Low       20 non-null int64
Volume    20 non-null int64
dtypes: int64(5), object(1)
memory usage: 1.1+ KB
None


In [21]:
#문자열 데이터를 pandas의  Timestamp로 변환해서 새로운 열로 추가 
df['new_Date'] = pd.to_datetime(df['Date'])  
print(df.head())
print(df.info())

         Date  Close  Start   High    Low  Volume   new_Date
0  2018-07-02  10100  10850  10900  10000  137977 2018-07-02
1  2018-06-29  10700  10550  10900   9990  170253 2018-06-29
2  2018-06-28  10400  10900  10950  10150  155769 2018-06-28
3  2018-06-27  10900  10800  11050  10500  133548 2018-06-27
4  2018-06-26  10800  10900  11000  10700   63039 2018-06-26
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
Date        20 non-null object
Close       20 non-null int64
Start       20 non-null int64
High        20 non-null int64
Low         20 non-null int64
Volume      20 non-null int64
new_Date    20 non-null datetime64[ns]
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 1.2+ KB
None


In [22]:
#년, 월, 일  값 속성으로 접근
df['Year'] = df['new_Date'].dt.year
df['Month'] = df['new_Date'].dt.month
df['Day'] = df['new_Date'].dt.day
print(df.head())

         Date  Close  Start   High    Low  Volume   new_Date  Year  Month  Day
0  2018-07-02  10100  10850  10900  10000  137977 2018-07-02  2018      7    2
1  2018-06-29  10700  10550  10900   9990  170253 2018-06-29  2018      6   29
2  2018-06-28  10400  10900  10950  10150  155769 2018-06-28  2018      6   28
3  2018-06-27  10900  10800  11050  10500  133548 2018-06-27  2018      6   27
4  2018-06-26  10800  10900  11000  10700   63039 2018-06-26  2018      6   26


In [23]:
df['Date_yr'] = df['new_Date'].dt.to_period(freq='A')
df['Date_m'] = df['new_Date'].dt.to_period(freq='M')
print(df.head())

         Date  Close  Start   High    Low  Volume   new_Date  Year  Month  \
0  2018-07-02  10100  10850  10900  10000  137977 2018-07-02  2018      7   
1  2018-06-29  10700  10550  10900   9990  170253 2018-06-29  2018      6   
2  2018-06-28  10400  10900  10950  10150  155769 2018-06-28  2018      6   
3  2018-06-27  10900  10800  11050  10500  133548 2018-06-27  2018      6   
4  2018-06-26  10800  10900  11000  10700   63039 2018-06-26  2018      6   

   Day Date_yr   Date_m  
0    2    2018  2018-07  
1   29    2018  2018-06  
2   28    2018  2018-06  
3   27    2018  2018-06  
4   26    2018  2018-06  


In [24]:
df.set_index( 'Date_m' , inplace=True)
print(df.head())

               Date  Close  Start   High    Low  Volume   new_Date  Year  \
Date_m                                                                     
2018-07  2018-07-02  10100  10850  10900  10000  137977 2018-07-02  2018   
2018-06  2018-06-29  10700  10550  10900   9990  170253 2018-06-29  2018   
2018-06  2018-06-28  10400  10900  10950  10150  155769 2018-06-28  2018   
2018-06  2018-06-27  10900  10800  11050  10500  133548 2018-06-27  2018   
2018-06  2018-06-26  10800  10900  11000  10700   63039 2018-06-26  2018   

         Month  Day Date_yr  
Date_m                       
2018-07      7    2    2018  
2018-06      6   29    2018  
2018-06      6   28    2018  
2018-06      6   27    2018  
2018-06      6   26    2018  


---

In [25]:
df = pd.read_csv("./data/stock-data.csv")
print(df.head())
print(df.info())

         Date  Close  Start   High    Low  Volume
0  2018-07-02  10100  10850  10900  10000  137977
1  2018-06-29  10700  10550  10900   9990  170253
2  2018-06-28  10400  10900  10950  10150  155769
3  2018-06-27  10900  10800  11050  10500  133548
4  2018-06-26  10800  10900  11000  10700   63039
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
Date      20 non-null object
Close     20 non-null int64
Start     20 non-null int64
High      20 non-null int64
Low       20 non-null int64
Volume    20 non-null int64
dtypes: int64(5), object(1)
memory usage: 1.1+ KB
None


In [26]:
#문자열 데이터를 pandas의  Timestamp로 변환해서 새로운 열로 추가 
df['new_Date'] = pd.to_datetime(df['Date'])  
df.set_index( 'new_Date' , inplace=True)
print(df.head())
print(df.index)

                  Date  Close  Start   High    Low  Volume
new_Date                                                  
2018-07-02  2018-07-02  10100  10850  10900  10000  137977
2018-06-29  2018-06-29  10700  10550  10900   9990  170253
2018-06-28  2018-06-28  10400  10900  10950  10150  155769
2018-06-27  2018-06-27  10900  10800  11050  10500  133548
2018-06-26  2018-06-26  10800  10900  11000  10700   63039
DatetimeIndex(['2018-07-02', '2018-06-29', '2018-06-28', '2018-06-27',
               '2018-06-26', '2018-06-25', '2018-06-22', '2018-06-21',
               '2018-06-20', '2018-06-19', '2018-06-18', '2018-06-15',
               '2018-06-14', '2018-06-12', '2018-06-11', '2018-06-08',
               '2018-06-07', '2018-06-05', '2018-06-04', '2018-06-01'],
              dtype='datetime64[ns]', name='new_Date', freq=None)


In [27]:
df_y = df['2018']
print(df_y)

                  Date  Close  Start   High    Low  Volume
new_Date                                                  
2018-07-02  2018-07-02  10100  10850  10900  10000  137977
2018-06-29  2018-06-29  10700  10550  10900   9990  170253
2018-06-28  2018-06-28  10400  10900  10950  10150  155769
2018-06-27  2018-06-27  10900  10800  11050  10500  133548
2018-06-26  2018-06-26  10800  10900  11000  10700   63039
2018-06-25  2018-06-25  11150  11400  11450  11000   55519
2018-06-22  2018-06-22  11300  11250  11450  10750  134805
2018-06-21  2018-06-21  11200  11350  11750  11200  133002
2018-06-20  2018-06-20  11550  11200  11600  10900  308596
2018-06-19  2018-06-19  11300  11850  11950  11300  180656
2018-06-18  2018-06-18  12000  13400  13400  12000  309787
2018-06-15  2018-06-15  13400  13600  13600  12900  201376
2018-06-14  2018-06-14  13450  13200  13700  13150  347451
2018-06-12  2018-06-12  13200  12200  13300  12050  558148
2018-06-11  2018-06-11  11950  12000  12250  11950   622

In [28]:
df_ym = df.loc['2018-07']
print(df_ym)

                  Date  Close  Start   High    Low  Volume
new_Date                                                  
2018-07-02  2018-07-02  10100  10850  10900  10000  137977


In [29]:
df_ym_cols = df.loc['2018-07', 'Start':'High']
print(df_ym_cols)

            Start   High
new_Date                
2018-07-02  10850  10900


In [30]:
df_ymd_range =df.loc['2018-06-25': '2018-06-20']
print(df_ymd_range)

                  Date  Close  Start   High    Low  Volume
new_Date                                                  
2018-06-25  2018-06-25  11150  11400  11450  11000   55519
2018-06-22  2018-06-22  11300  11250  11450  10750  134805
2018-06-21  2018-06-21  11200  11350  11750  11200  133002
2018-06-20  2018-06-20  11550  11200  11600  10900  308596
