In [2]:
import pandas as pd

# read_csv() 함수로 파일 읽어서 df로 변환
df = pd.read_csv('./data/stock-data.csv')

# 문자열인 날짜 데이터를 판다스 Timestamp로 변환
df['new_Date'] = pd.to_datetime(df['Date'])

# 원하는 열을 새로운 행 인덱스로 지정하고 오름차순 정렬
df = df.set_index('new_Date').sort_index()

print(df.head())
print('\n')
print(df.index)

                  Date  Close  Start   High    Low  Volume
new_Date                                                  
2018-06-01  2018-06-01  11900  11800  12100  11750   32062
2018-06-04  2018-06-04  11900  11900  12200  11700   25171
2018-06-05  2018-06-05  12150  11800  12250  11800   42485
2018-06-07  2018-06-07  11950  12200  12300  11900   49088
2018-06-08  2018-06-08  11950  11950  12200  11800   59258


DatetimeIndex(['2018-06-01', '2018-06-04', '2018-06-05', '2018-06-07',
               '2018-06-08', '2018-06-11', '2018-06-12', '2018-06-14',
               '2018-06-15', '2018-06-18', '2018-06-19', '2018-06-20',
               '2018-06-21', '2018-06-22', '2018-06-25', '2018-06-26',
               '2018-06-27', '2018-06-28', '2018-06-29', '2018-07-02'],
              dtype='datetime64[ns]', name='new_Date', freq=None)


In [3]:
# 부분 문자열 인덱싱 1
df.loc['2018-06-27']

Date      2018-06-27
Close          10900
Start          10800
High           11050
Low            10500
Volume        133548
Name: 2018-06-27 00:00:00, dtype: object

In [4]:
# 부분 문자열 인덱싱 2
df.loc['2018-07']

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-07-02,2018-07-02,10100,10850,10900,10000,137977


In [6]:
# 부분 문자열 인덱싱 3
df.loc['2018-06-27':'2018-07-02']

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-06-27,2018-06-27,10900,10800,11050,10500,133548
2018-06-28,2018-06-28,10400,10900,10950,10150,155769
2018-06-29,2018-06-29,10700,10550,10900,9990,170253
2018-07-02,2018-07-02,10100,10850,10900,10000,137977


In [7]:
# 부분 문자열 인덱싱 4
df[df.index < '2018-06-05']

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-06-01,2018-06-01,11900,11800,12100,11750,32062
2018-06-04,2018-06-04,11900,11900,12200,11700,25171


In [8]:
# 시간 자료형을 활용한 인덱싱 1
df.loc[pd.Timestamp(2018, 6, 27):pd.Timestamp(2018, 7, 2)]

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-06-27,2018-06-27,10900,10800,11050,10500,133548
2018-06-28,2018-06-28,10400,10900,10950,10150,155769
2018-06-29,2018-06-29,10700,10550,10900,9990,170253
2018-07-02,2018-07-02,10100,10850,10900,10000,137977


In [9]:
# 시간 자료형을 활용한 인덱싱 2
df.loc[pd.Timestamp(2018, 6, 27, 10, 30, 0):pd.Timestamp(2018, 7, 2, 23, 59, 59)]

Unnamed: 0_level_0,Date,Close,Start,High,Low,Volume
new_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-06-28,2018-06-28,10400,10900,10950,10150,155769
2018-06-29,2018-06-29,10700,10550,10900,9990,170253
2018-07-02,2018-07-02,10100,10850,10900,10000,137977


In [10]:
# 날짜, 시간의 절대적 차이 1
print(pd.Timedelta('1 days'))
print(pd.Timedelta(days=1))
print(pd.Timedelta('1 days 1 hours 1 minutes, 1 seconds'))
print(pd.Timedelta(days=1, hours=1, minutes=1, seconds=1))

1 days 00:00:00
1 days 00:00:00
1 days 01:01:01
1 days 01:01:01


In [11]:
# 날짜, 시간의 절대적 차이 2
pd.to_timedelta(['1 days', '3 hours'])

TimedeltaIndex(['1 days 00:00:00', '0 days 03:00:00'], dtype='timedelta64[ns]', freq=None)

In [13]:
# 날짜, 시간의 절대적 차이 3
a = df.index
print(a)

b = pd.Timestamp('2018-07-03') - a
print(b)

c = a + pd.Timedelta(days=1)
print(c)

print(c.min())
print(c.max())

DatetimeIndex(['2018-06-01', '2018-06-04', '2018-06-05', '2018-06-07',
               '2018-06-08', '2018-06-11', '2018-06-12', '2018-06-14',
               '2018-06-15', '2018-06-18', '2018-06-19', '2018-06-20',
               '2018-06-21', '2018-06-22', '2018-06-25', '2018-06-26',
               '2018-06-27', '2018-06-28', '2018-06-29', '2018-07-02'],
              dtype='datetime64[ns]', name='new_Date', freq=None)
TimedeltaIndex(['32 days', '29 days', '28 days', '26 days', '25 days',
                '22 days', '21 days', '19 days', '18 days', '15 days',
                '14 days', '13 days', '12 days', '11 days',  '8 days',
                 '7 days',  '6 days',  '5 days',  '4 days',  '1 days'],
               dtype='timedelta64[ns]', name='new_Date', freq=None)
DatetimeIndex(['2018-06-02', '2018-06-05', '2018-06-06', '2018-06-08',
               '2018-06-09', '2018-06-12', '2018-06-13', '2018-06-15',
               '2018-06-16', '2018-06-19', '2018-06-20', '2018-06-21',
            