## 第10章　日時型

In [2]:
import numpy as np
import pandas as pd
import datetime

In [3]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


### 10-1　日時型、日付型への変換

日時型や日付型は実際に使用する機会が多いが、時刻型はあまり使われることがない。

まずは、datetime64[ns]型への変換をしてみる。

In [4]:
reserve_tb['reserve'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')
reserve_tb['checkin'] = pd.to_datetime(reserve_tb['checkin_date'] + reserve_tb['checkin_time'], format='%Y-%m-%d%H:%M:%S')

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime            object
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
reserve             datetime64[ns]
checkin             datetime64[ns]
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,reserve,checkin
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,2016-03-06 13:09:42,2016-03-26 10:00:00
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,2016-07-16 23:39:55,2016-07-20 11:30:00
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,2016-09-24 10:03:17,2016-10-19 09:00:00
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,2017-03-08 03:20:10,2017-03-29 11:00:00
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,2017-09-05 19:50:37,2017-09-22 10:30:00


#### dt.date

datetime64[ns]型から日付情報を取得する。その際は、.dt.dateを利用すれば良い。

In [5]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [6]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S').dt.date
reserve_tb['checkin_date'] = pd.to_datetime(reserve_tb['checkin_date'], format='%Y-%m-%d').dt.date

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05,2017-09-22,10:30:00,2017-09-23,3,68100


#### dt.time

同様に、datetime64[ns]型から時刻情報を取得する。その際は、.dt.timeを利用すれば良い。

In [7]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [8]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S').dt.time
reserve_tb['checkin_time'] = pd.to_datetime(reserve_tb['checkin_time'], format='%H:%M:%S').dt.time

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


### 10-2　年／月／日／時刻／分／秒／曜日への変換

In [9]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [10]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime    datetime64[ns]
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [11]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.year).head()

Unnamed: 0,reserve_datetime
0,2016
1,2016
2,2016
3,2017
4,2017


In [12]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.month).head()

Unnamed: 0,reserve_datetime
0,3
1,7
2,9
3,3
4,9


In [13]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.day).head()

Unnamed: 0,reserve_datetime
0,6
1,16
2,24
3,8
4,5


In [14]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.dayofweek).head()

Unnamed: 0,reserve_datetime
0,6
1,5
2,5
3,2
4,1


日曜日は0、月曜日は1...となっている。

In [15]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.hour).head()

Unnamed: 0,reserve_datetime
0,13
1,23
2,10
3,3
4,19


In [16]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.minute).head()

Unnamed: 0,reserve_datetime
0,9
1,39
2,3
3,20
4,50


In [17]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.second).head()

Unnamed: 0,reserve_datetime
0,42
1,55
2,17
3,10
4,37


In [18]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.strftime('%Y-%m-%d %H:%M:%S'))

Unnamed: 0,reserve_datetime
0,2016-03-06 13:09:42
1,2016-07-16 23:39:55
2,2016-09-24 10:03:17
3,2017-03-08 03:20:10
4,2017-09-05 19:50:37
...,...
4025,2017-06-27 23:00:02
4026,2017-09-29 05:24:57
4027,2018-03-14 05:01:45
4028,2016-04-16 15:20:17


strftime：指定したフォーマットの文字列に日時型を変換する関数

### 10-3　日時差への変換

In [19]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [20]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')
reserve_tb['checkin_datetime'] = pd.to_datetime(reserve_tb['checkin_date']
                                                + reserve_tb['checkin_time'], format='%Y-%m-%d%H:%M:%S')

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime    datetime64[ns]
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
checkin_datetime    datetime64[ns]
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,checkin_datetime
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,2016-03-26 10:00:00
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,2016-07-20 11:30:00
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,2016-10-19 09:00:00
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,2017-03-29 11:00:00
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,2017-09-22 10:30:00


#### 年の差分を計算

In [21]:
delta_year = reserve_tb['reserve_datetime'].dt.year - reserve_tb['checkin_datetime'].dt.year

pd.DataFrame(delta_year).head()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0


#### 月の差分を計算

年を月に変換するために、12を掛けている。

In [22]:
delta_month = (reserve_tb['reserve_datetime'].dt.year * 12 + reserve_tb['reserve_datetime'].dt.month)\
- (reserve_tb['checkin_datetime'].dt.year * 12 + reserve_tb['checkin_datetime'].dt.month)

pd.DataFrame(delta_month).head()

Unnamed: 0,0
0,0
1,0
2,-1
3,0
4,0


#### 日単位で差分を計算

In [23]:
delta_day = (reserve_tb['reserve_datetime'] - reserve_tb['checkin_datetime']).astype('timedelta64[D]')

pd.DataFrame(delta_day).head()

Unnamed: 0,0
0,-20.0
1,-4.0
2,-25.0
3,-22.0
4,-17.0


#### 時単位で差分を計算

In [24]:
delta_hour = (reserve_tb['reserve_datetime'] - reserve_tb['checkin_datetime']).astype('timedelta64[h]')

pd.DataFrame(delta_hour).head()

Unnamed: 0,0
0,-477.0
1,-84.0
2,-599.0
3,-512.0
4,-399.0


#### 分単位で差分を計算

In [25]:
delta_minutes = (reserve_tb['reserve_datetime'] - reserve_tb['checkin_datetime']).astype('timedelta64[m]')

pd.DataFrame(delta_minutes).head()

Unnamed: 0,0
0,-28611.0
1,-5031.0
2,-35937.0
3,-30700.0
4,-23920.0


#### 秒単位で差分を計算

In [26]:
delta_second = (reserve_tb['reserve_datetime'] - reserve_tb['checkin_datetime']).astype('timedelta64[s]')

pd.DataFrame(delta_second).head()

Unnamed: 0,0
0,-1716618.0
1,-301805.0
2,-2156203.0
3,-1841990.0
4,-1435163.0


### 10-4　日時型の増減

In [27]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [28]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')

reserve_tb['reserve_date'] = reserve_tb['reserve_datetime'].dt.date

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime    datetime64[ns]
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
reserve_date                object
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,reserve_date
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,2016-03-06
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,2016-07-16
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,2016-09-24
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,2017-03-08
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,2017-09-05


In [29]:
pd.DataFrame(reserve_tb['reserve_datetime'] + datetime.timedelta(days=1)).head()

Unnamed: 0,reserve_datetime
0,2016-03-07 13:09:42
1,2016-07-17 23:39:55
2,2016-09-25 10:03:17
3,2017-03-09 03:20:10
4,2017-09-06 19:50:37


In [30]:
pd.DataFrame(reserve_tb['reserve_date'] + datetime.timedelta(days=1)).head()

Unnamed: 0,reserve_date
0,2016-03-07
1,2016-07-17
2,2016-09-25
3,2017-03-09
4,2017-09-06


In [31]:
pd.DataFrame(reserve_tb['reserve_datetime'] + datetime.timedelta(hours=1)).head()

Unnamed: 0,reserve_datetime
0,2016-03-06 14:09:42
1,2016-07-17 00:39:55
2,2016-09-24 11:03:17
3,2017-03-08 04:20:10
4,2017-09-05 20:50:37


In [32]:
pd.DataFrame(reserve_tb['reserve_datetime'] + datetime.timedelta(minutes=1)).head()

Unnamed: 0,reserve_datetime
0,2016-03-06 13:10:42
1,2016-07-16 23:40:55
2,2016-09-24 10:04:17
3,2017-03-08 03:21:10
4,2017-09-05 19:51:37


In [33]:
pd.DataFrame(reserve_tb['reserve_datetime'] + datetime.timedelta(seconds=1)).head()

Unnamed: 0,reserve_datetime
0,2016-03-06 13:09:43
1,2016-07-16 23:39:56
2,2016-09-24 10:03:18
3,2017-03-08 03:20:11
4,2017-09-05 19:50:38


timedelta関数はtimedelta型を生成する関数で、それぞれの日時単位における増減を求めることができる。

### 10-5　季節への変換

In [34]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [35]:
reserve_tb['reserve_datetime'] = pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime    datetime64[ns]
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [36]:
def to_season(month_num):
    season = 'winter'
    
    if 3 <= month_num <= 5:
        season = 'spring'
        
    elif 6 <= month_num <= 8:
        season = 'summer'
        
    elif 9 <= month_num <= 11:
        season = 'antumn'
        
    return season

In [37]:
reserve_tb['reserve_season'] = pd.Categorical(reserve_tb['reserve_datetime'].dt.month.apply(to_season))

print(reserve_tb.dtypes)
reserve_tb.head()

reserve_id                  object
hotel_id                    object
customer_id                 object
reserve_datetime    datetime64[ns]
checkin_date                object
checkin_time                object
checkout_date               object
people_num                   int64
total_price                  int64
reserve_season            category
dtype: object


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,reserve_season
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,spring
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,summer
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,antumn
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,spring
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,antumn


In [38]:
pd.DataFrame(reserve_tb['reserve_datetime'].dt.month).head()

Unnamed: 0,reserve_datetime
0,3
1,7
2,9
3,3
4,9


### 10-6　時間帯への変換

10-5 季節への変換と全く同じ方法で前処理できる。

### 10-7　平日／休日への変換

In [39]:
reserve_tb = pd.read_csv('reserve.csv')

print(reserve_tb.dtypes)
print(reserve_tb.shape)
reserve_tb.head()

reserve_id          object
hotel_id            object
customer_id         object
reserve_datetime    object
checkin_date        object
checkin_time        object
checkout_date       object
people_num           int64
total_price          int64
dtype: object
(4030, 9)


Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [42]:
holiday_mst = pd.read_csv('holiday_mst.csv')

print(holiday_mst.dtypes)
print(holiday_mst.shape)
holiday_mst.head()

target_day                object
holidayday_flg              bool
nextday_is_holiday_flg      bool
dtype: object
(1186, 3)


Unnamed: 0,target_day,holidayday_flg,nextday_is_holiday_flg
0,2016-01-01,True,True
1,2016-01-02,True,True
2,2016-01-03,True,False
3,2016-01-04,False,False
4,2016-01-05,False,False


In [44]:
pd.merge(reserve_tb, holiday_mst, left_on='checkin_date', right_on='target_day').head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,target_day,holidayday_flg,nextday_is_holiday_flg
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,2016-03-26,True,True
1,r1269,h_138,c_309,2016-03-14 13:57:45,2016-03-26,11:30:00,2016-03-29,4,115200,2016-03-26,True,True
2,r2192,h_267,c_547,2016-03-21 09:23:13,2016-03-26,11:00:00,2016-03-27,2,19600,2016-03-26,True,True
3,r2288,h_144,c_574,2016-03-05 23:44:17,2016-03-26,12:30:00,2016-03-28,3,60000,2016-03-26,True,True
4,r2987,h_230,c_754,2016-03-21 07:00:01,2016-03-26,10:00:00,2016-03-27,2,34800,2016-03-26,True,True


left_onで左側に来るDataFrameのキーを、right_onで右側に来るDataFrameのキーを指定している。