In [1]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import preprocessing

pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 60)
pd.set_option('mode.chained_assignment',  None)

plt.rc('font',family='malgun gothic')

smart_card_data = pd.read_csv('./data/smart_card_data.csv')
getrouteinfoall = pd.read_csv('./data/getRouteInfoAll.csv', encoding = 'cp949', index_col = 0)
getstationbyrouteall = pd.read_csv('./data/getStationByRouteAll.csv', encoding = 'cp949', index_col = 0)
bus_sttn = pd.read_csv('./data/bus_sttn.csv')
route_curve = pd.read_csv('./data/route_curve.csv', encoding = 'cp949')

### getrouteinfoall
getrouteinfoall_df = getrouteinfoall[['ROUTE_CD','ROUTE_NO']] 

### bus_sttn
bus_sttn_df = bus_sttn[['bus_sttn_id', 'posx', 'posy']]

In [2]:
from preprocessing import data_preprocessing

smart_card_data = data_preprocessing.smart_card_preprocessing(smart_card_data)
smart_card_data.head(3)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE,TRANSF_CNT,BUS_ROUTE_ID,USR_TYPE,GETON_BUS_STTN_ID,GETOFF_BUS_STTN_ID,YSR_CNT,GETON_PAY,GETOFF_PAY,GETON_DATETIME,GETOFF_DATETIME,WEEKDAY,DAY
0,541463722,680,0,30300052,1,8002432,8001526,1,1250,0,2022-04-01 08:17:06,2022-04-01 08:30:41,4,1
1,540104832,675,0,30300083,1,8001279,8001538,1,1250,0,2022-04-01 09:25:42,2022-04-01 09:37:29,4,1
2,608695670,673,1,30300003,1,8001777,8007023,1,0,0,2022-04-01 19:12:53,2022-04-01 19:21:42,4,1


In [3]:
def day_preprocessing(exdata,day):
    exdata = exdata[exdata['DAY']==day]
    exdata.drop(labels = ['WEEKDAY','DAY'], axis = 1, inplace = True)
    exdata['GETON_HOUR'] = exdata['GETON_DATETIME'].dt.hour
    return exdata

# 월요일

In [4]:
df_0404 = day_preprocessing(smart_card_data, 4)
df_0411 = day_preprocessing(smart_card_data, 11)
df_0418 = day_preprocessing(smart_card_data, 18)
df_0425 = day_preprocessing(smart_card_data, 25)

## 출근 시간대 (7시, 8시, 9시)

In [5]:
from preprocessing import transf_data_preprocessing

df_0404_go_to = transf_data_preprocessing.filtering_time(df_0404, 7, 8, 9)
df_0411_go_to = transf_data_preprocessing.filtering_time(df_0411, 7, 8, 9)
df_0418_go_to = transf_data_preprocessing.filtering_time(df_0418, 7, 8, 9)
df_0425_go_to = transf_data_preprocessing.filtering_time(df_0425, 7, 8, 9)

### 환승횟수 2회

In [6]:
transf_is_2_morning_0404, transf_is_not_2_morning_0404 = transf_data_preprocessing.transf_2_preprocessing(df_0404_go_to)
transf_is_2_morning_0411, transf_is_not_2_morning_0411 = transf_data_preprocessing.transf_2_preprocessing(df_0411_go_to)
transf_is_2_morning_0418, transf_is_not_2_morning_0418 = transf_data_preprocessing.transf_2_preprocessing(df_0418_go_to)
transf_is_2_morning_0425, transf_is_not_2_morning_0425 = transf_data_preprocessing.transf_2_preprocessing(df_0425_go_to)

In [7]:
transf_is_2_morning_0404_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0411_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0418_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0425_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0425, getrouteinfoall_df, bus_sttn_df)

In [8]:
transf_is_2_morning_monday = pd.concat([transf_is_2_morning_0404_merge,transf_is_2_morning_0411_merge,transf_is_2_morning_0418_merge,transf_is_2_morning_0425_merge])
transf_is_2_morning_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2,TR_MEANS_TYPE_3,ROUTE_NO_3,GETON_BUS_STTN_ID_3,GETON_PAY_3,GETON_DATETIME_3,GETOFF_BUS_STTN_ID_3,GETOFF_PAY_3,GETOFF_DATETIME_3,GETOFF_BUS_X_3,GETOFF_BUS_Y_3
0,93643729,676,705,0,8002537,1250,2022-04-04 07:23:39,36.40083,127.40498,8002889,0,2022-04-04 07:48:44,36.448162,127.410385,674,2,8002131,0,2022-04-04 07:58:19,36.450222,127.42443,8002133,0,2022-04-04 08:03:33,36.449863,127.428955,676,704,8002133,0,2022-04-04 08:11:01,8007262,0,2022-04-04 08:17:16,36.44777,127.439354
1,531197789,675,611,0,8002307,1250,2022-04-04 07:38:42,36.32833,127.453545,8001417,0,2022-04-04 07:47:57,36.33195,127.43239,674,2,8001418,0,2022-04-04 07:54:01,36.33254,127.43213,8002134,0,2022-04-04 08:39:36,36.449253,127.42927,676,704,8002133,0,2022-04-04 08:49:47,8007262,0,2022-04-04 08:54:05,36.44777,127.439354


### 환승횟수 1회

In [9]:
transf_is_1_morning_0404, transf_is_not_1_morning_0404 = transf_data_preprocessing.transf_1_preprocessing(df_0404_go_to)
transf_is_1_morning_0411, transf_is_not_1_morning_0411 = transf_data_preprocessing.transf_1_preprocessing(df_0411_go_to)
transf_is_1_morning_0418, transf_is_not_1_morning_0418 = transf_data_preprocessing.transf_1_preprocessing(df_0418_go_to)
transf_is_1_morning_0425, transf_is_not_1_morning_0425 = transf_data_preprocessing.transf_1_preprocessing(df_0425_go_to)

In [10]:
transf_is_1_morning_0404_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0411_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0418_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0425_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0425, getrouteinfoall_df, bus_sttn_df)

In [15]:
transf_is_1_morning_monday = pd.concat([transf_is_1_morning_0404_merge,transf_is_1_morning_0411_merge,transf_is_1_morning_0418_merge,transf_is_1_morning_0425_merge])
transf_is_1_morning_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2
0,90026692,675,512,0,8002903,1250,2022-04-04 07:58:49,36.35071,127.41927,8001876,0,2022-04-04 08:06:30,36.336464,127.43009,676,201,8001876,0,2022-04-04 08:13:45,36.336464,127.43009,8002605,0,2022-04-04 08:24:16,36.3243,127.418655
1,755890682,675,616,0,8002361,1250,2022-04-04 08:29:22,36.36763,127.41576,8001876,0,2022-04-04 08:44:00,36.336464,127.43009,675,613,8001876,0,2022-04-04 08:45:52,36.336464,127.43009,8002605,0,2022-04-04 08:58:33,36.3243,127.418655


### 환승횟수 0회

In [12]:
transf_is_0_morning_0404, transf_is_not_0_morning_0404 = transf_data_preprocessing.transf_0_preprocessing(df_0404_go_to)
transf_is_0_morning_0411, transf_is_not_0_morning_0411 = transf_data_preprocessing.transf_0_preprocessing(df_0411_go_to)
transf_is_0_morning_0418, transf_is_not_0_morning_0418 = transf_data_preprocessing.transf_0_preprocessing(df_0418_go_to)
transf_is_0_morning_0425, transf_is_not_0_morning_0425 = transf_data_preprocessing.transf_0_preprocessing(df_0425_go_to)

In [13]:
transf_is_0_morning_0404_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0411_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0418_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0425_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0425, getrouteinfoall_df, bus_sttn_df)

In [16]:
transf_is_0_morning_monday = pd.concat([transf_is_0_morning_0404_merge,transf_is_0_morning_0411_merge,transf_is_0_morning_0418_merge,transf_is_0_morning_0425_merge])
transf_is_0_morning_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE,ROUTE_NO,TRANSF_CNT,GETON_BUS_STTN_ID,GETON_PAY,GETON_DATETIME,GETON_BUS_X,GETON_BUS_Y,GETOFF_BUS_STTN_ID,GETOFF_PAY,GETOFF_DATETIME,GETOFF_BUS_X,GETOFF_BUS_Y
0,767600446,677,1002,0,8007344,1250,2022-04-04 09:19:36,36.47576,127.281784,8001677,300,2022-04-04 09:32:42,36.392002,127.31382
1,750411291,677,1002,0,8007344,1250,2022-04-04 07:16:53,36.47576,127.281784,8001677,300,2022-04-04 07:32:55,36.392002,127.31382


## 퇴근 시간대 (16시, 17시, 18시)

In [17]:
from preprocessing import transf_data_preprocessing

df_0404_leave_to = transf_data_preprocessing.filtering_time(df_0404, 16, 17, 18)
df_0411_leave_to = transf_data_preprocessing.filtering_time(df_0411, 16, 17, 18)
df_0418_leave_to = transf_data_preprocessing.filtering_time(df_0418, 16, 17, 18)
df_0425_leave_to = transf_data_preprocessing.filtering_time(df_0425, 16, 17, 18)

### 환승횟수 2회

In [18]:
transf_is_2_night_0404, transf_is_not_2_night_0404 = transf_data_preprocessing.transf_2_preprocessing(df_0404_leave_to)
transf_is_2_night_0411, transf_is_not_2_night_0411 = transf_data_preprocessing.transf_2_preprocessing(df_0411_leave_to)
transf_is_2_night_0418, transf_is_not_2_night_0418 = transf_data_preprocessing.transf_2_preprocessing(df_0418_leave_to)
transf_is_2_night_0425, transf_is_not_2_night_0425 = transf_data_preprocessing.transf_2_preprocessing(df_0425_leave_to)

In [19]:
transf_is_2_night_0404_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0411_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0418_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0425_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0425, getrouteinfoall_df, bus_sttn_df)

In [20]:
transf_is_2_night_monday = pd.concat([transf_is_2_night_0404_merge,transf_is_2_night_0411_merge,transf_is_2_night_0418_merge,transf_is_2_night_0425_merge])
transf_is_2_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2,TR_MEANS_TYPE_3,ROUTE_NO_3,GETON_BUS_STTN_ID_3,GETON_PAY_3,GETON_DATETIME_3,GETOFF_BUS_STTN_ID_3,GETOFF_PAY_3,GETOFF_DATETIME_3,GETOFF_BUS_X_3,GETOFF_BUS_Y_3
0,644674991,676,102,0,8002345,1250,2022-04-04 16:01:22,36.33444,127.44585,8001348,0,2022-04-04 16:03:20,36.32906,127.441734,674,1,8001348,0,2022-04-04 16:07:24,36.32906,127.441734,8002119,0,2022-04-04 16:35:33,36.3117,127.37664,675,115,8002119,0,2022-04-04 16:45:14,8070011,0,2022-04-04 16:55:22,36.31854,127.34803
1,400151515,675,916,0,8002761,1250,2022-04-04 16:00:27,36.341293,127.39681,8002276,0,2022-04-04 16:03:44,36.337902,127.392494,675,211,8002276,0,2022-04-04 16:05:01,36.337902,127.392494,8002119,0,2022-04-04 16:20:21,36.3117,127.37664,681,21,8002119,0,2022-04-04 16:40:56,8002438,0,2022-04-04 16:53:45,36.297485,127.355156


### 환승횟수 1회

In [21]:
transf_is_1_night_0404, transf_is_not_1_night_0404 = transf_data_preprocessing.transf_1_preprocessing(df_0404_leave_to)
transf_is_1_night_0411, transf_is_not_1_night_0411 = transf_data_preprocessing.transf_1_preprocessing(df_0411_leave_to)
transf_is_1_night_0418, transf_is_not_1_night_0418 = transf_data_preprocessing.transf_1_preprocessing(df_0418_leave_to)
transf_is_1_night_0425, transf_is_not_1_night_0425 = transf_data_preprocessing.transf_1_preprocessing(df_0425_leave_to)

In [22]:
transf_is_1_night_0404_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0411_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0418_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0425_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0425, getrouteinfoall_df, bus_sttn_df)

In [23]:
transf_is_1_night_monday = pd.concat([transf_is_1_night_0404_merge,transf_is_1_night_0411_merge,transf_is_1_night_0418_merge,transf_is_1_night_0425_merge])
transf_is_1_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2
0,90026785,675,314,0,8002707,1250,2022-04-04 16:17:35,36.30422,127.37948,8001653,0,2022-04-04 16:22:58,36.3103,127.390205,675,513,8001653,0,2022-04-04 16:41:00,36.3103,127.390205,8002457,0,2022-04-04 17:10:56,36.301357,127.45215
1,786002827,676,606,0,8001102,1250,2022-04-04 16:21:04,36.382385,127.37842,8001876,0,2022-04-04 16:55:14,36.336464,127.43009,679,501,8001876,0,2022-04-04 16:58:50,36.336464,127.43009,8002457,0,2022-04-04 17:18:51,36.301357,127.45215


### 환승횟수 0회

In [24]:
transf_is_0_night_0404, transf_is_not_0_night_0404 = transf_data_preprocessing.transf_0_preprocessing(df_0404_leave_to)
transf_is_0_night_0411, transf_is_not_0_night_0411 = transf_data_preprocessing.transf_0_preprocessing(df_0411_leave_to)
transf_is_0_night_0418, transf_is_not_0_night_0418 = transf_data_preprocessing.transf_0_preprocessing(df_0418_leave_to)
transf_is_0_night_0425, transf_is_not_0_night_0425 = transf_data_preprocessing.transf_0_preprocessing(df_0425_leave_to)

In [25]:
transf_is_0_night_0404_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0411_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0418_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0425_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0425, getrouteinfoall_df, bus_sttn_df)

In [26]:
transf_is_0_night_monday = pd.concat([transf_is_0_night_0404_merge,transf_is_0_night_0411_merge,transf_is_0_night_0418_merge,transf_is_0_night_0425_merge])
transf_is_0_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE,ROUTE_NO,TRANSF_CNT,GETON_BUS_STTN_ID,GETON_PAY,GETON_DATETIME,GETON_BUS_X,GETON_BUS_Y,GETOFF_BUS_STTN_ID,GETOFF_PAY,GETOFF_DATETIME,GETOFF_BUS_X,GETOFF_BUS_Y
0,661771805,676,705,0,8001922,1250,2022-04-04 16:36:36,36.36827,127.37935,8002702,0,2022-04-04 16:53:03,36.35111,127.39719
1,708856323,676,705,0,8001922,1250,2022-04-04 18:06:14,36.36827,127.37935,8002702,0,2022-04-04 18:31:11,36.35111,127.39719


# 화요일

In [27]:
df_0405 = day_preprocessing(smart_card_data, 5)
df_0412 = day_preprocessing(smart_card_data, 12)
df_0419 = day_preprocessing(smart_card_data, 19)
df_0426 = day_preprocessing(smart_card_data, 26)

## 출근 시간대 (7시, 8시, 9시)

In [28]:
from preprocessing import transf_data_preprocessing

df_0405_go_to = transf_data_preprocessing.filtering_time(df_0405, 7, 8, 9)
df_0412_go_to = transf_data_preprocessing.filtering_time(df_0412, 7, 8, 9)
df_0419_go_to = transf_data_preprocessing.filtering_time(df_0419, 7, 8, 9)
df_0426_go_to = transf_data_preprocessing.filtering_time(df_0426, 7, 8, 9)

### 환승횟수 2회

In [29]:
transf_is_2_morning_0405, transf_is_not_2_morning_0405 = transf_data_preprocessing.transf_2_preprocessing(df_0405_go_to)
transf_is_2_morning_0412, transf_is_not_2_morning_0412 = transf_data_preprocessing.transf_2_preprocessing(df_0412_go_to)
transf_is_2_morning_0419, transf_is_not_2_morning_0419 = transf_data_preprocessing.transf_2_preprocessing(df_0419_go_to)
transf_is_2_morning_0426, transf_is_not_2_morning_0426 = transf_data_preprocessing.transf_2_preprocessing(df_0426_go_to)

In [30]:
transf_is_2_morning_0405_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0405, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0412_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0412, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0419_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0419, getrouteinfoall_df, bus_sttn_df)
transf_is_2_morning_0426_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_morning_0426, getrouteinfoall_df, bus_sttn_df)

In [31]:
transf_is_2_morning_tuesday = pd.concat([transf_is_2_morning_0405_merge,transf_is_2_morning_0412_merge,transf_is_2_morning_0419_merge,transf_is_2_morning_0426_merge])
transf_is_2_morning_tuesday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2,TR_MEANS_TYPE_3,ROUTE_NO_3,GETON_BUS_STTN_ID_3,GETON_PAY_3,GETON_DATETIME_3,GETOFF_BUS_STTN_ID_3,GETOFF_PAY_3,GETOFF_DATETIME_3,GETOFF_BUS_X_3,GETOFF_BUS_Y_3
0,90027797,676,203,0,8001079,1250,2022-04-05 07:08:43,36.30686,127.34085,8002814,0,2022-04-05 07:33:16,36.35306,127.379524,676,301,8002814,0,2022-04-05 07:36:37,36.35306,127.379524,8001923,0,2022-04-05 07:42:55,36.36857,127.37971,676,705,8001923,0,2022-04-05 07:49:13,8002537,0,2022-04-05 08:06:34,36.40083,127.40498
1,632569347,676,203,0,8001089,1250,2022-04-05 08:57:19,36.3473,127.37599,8002453,0,2022-04-05 09:00:26,36.3506,127.37813,676,301,8002814,0,2022-04-05 09:04:09,36.35306,127.379524,8001923,0,2022-04-05 09:11:10,36.36857,127.37971,676,705,8001923,0,2022-04-05 09:18:50,8002537,0,2022-04-05 09:30:51,36.40083,127.40498


### 환승횟수 1회

In [34]:
transf_is_1_morning_0405, transf_is_not_1_morning_0405 = transf_data_preprocessing.transf_1_preprocessing(df_0405_go_to)
transf_is_1_morning_0412, transf_is_not_1_morning_0412 = transf_data_preprocessing.transf_1_preprocessing(df_0412_go_to)
transf_is_1_morning_0419, transf_is_not_1_morning_0419 = transf_data_preprocessing.transf_1_preprocessing(df_0419_go_to)
transf_is_1_morning_0426, transf_is_not_1_morning_0426 = transf_data_preprocessing.transf_1_preprocessing(df_0426_go_to)

In [35]:
transf_is_1_morning_0405_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0405, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0412_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0412, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0419_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0419, getrouteinfoall_df, bus_sttn_df)
transf_is_1_morning_0426_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_morning_0426, getrouteinfoall_df, bus_sttn_df)

In [36]:
transf_is_1_morning_tuesday = pd.concat([transf_is_1_morning_0405_merge,transf_is_1_morning_0412_merge,transf_is_1_morning_0419_merge,transf_is_1_morning_0426_merge])
transf_is_1_morning_tuesday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2
0,88299523,676,704,0,8002963,1250,2022-04-05 09:25:39,36.385365,127.35298,8001196,0,2022-04-05 09:34:36,36.36625,127.33871,675,119,8001282,0,2022-04-05 09:57:21,36.36545,127.335724,8005905,0,2022-04-05 09:59:20,36.369205,127.327194
1,753277195,676,704,0,8001438,1250,2022-04-05 08:05:06,36.315563,127.315384,8002412,0,2022-04-05 08:25:44,36.354378,127.342026,675,119,8002412,0,2022-04-05 08:30:52,36.354378,127.342026,8005905,0,2022-04-05 08:44:54,36.369205,127.327194


### 환승횟수 0회

In [39]:
transf_is_0_morning_0405, transf_is_not_0_morning_0405 = transf_data_preprocessing.transf_0_preprocessing(df_0405_go_to)
transf_is_0_morning_0412, transf_is_not_0_morning_0412 = transf_data_preprocessing.transf_0_preprocessing(df_0412_go_to)
transf_is_0_morning_0419, transf_is_not_0_morning_0419 = transf_data_preprocessing.transf_0_preprocessing(df_0419_go_to)
transf_is_0_morning_0426, transf_is_not_0_morning_0426 = transf_data_preprocessing.transf_0_preprocessing(df_0426_go_to)

In [40]:
transf_is_0_morning_0405_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0405, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0412_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0412, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0419_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0419, getrouteinfoall_df, bus_sttn_df)
transf_is_0_morning_0426_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_morning_0426, getrouteinfoall_df, bus_sttn_df)

In [41]:
transf_is_0_morning_tuesday = pd.concat([transf_is_0_morning_0405_merge,transf_is_0_morning_0412_merge,transf_is_0_morning_0419_merge,transf_is_0_morning_0426_merge])
transf_is_0_morning_tuesday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE,ROUTE_NO,TRANSF_CNT,GETON_BUS_STTN_ID,GETON_PAY,GETON_DATETIME,GETON_BUS_X,GETON_BUS_Y,GETOFF_BUS_STTN_ID,GETOFF_PAY,GETOFF_DATETIME,GETOFF_BUS_X,GETOFF_BUS_Y
0,742373150,675,314,0,8002705,1250,2022-04-05 08:32:16,36.30011,127.37878,8001927,0,2022-04-05 08:49:00,36.321434,127.409584
1,725892839,675,314,0,8002705,1250,2022-04-05 08:08:17,36.30011,127.37878,8001927,0,2022-04-05 08:27:29,36.321434,127.409584


In [42]:
transf_is_0_morning_tuesday['GETOFF_BUS_STTN_ID'].value_counts().head(10).index

Index([8002453, 8002412, 8001097, 8002814, 8001927, 8002969, 8001420, 8002279,
       8001934, 8001412],
      dtype='int32', name='GETOFF_BUS_STTN_ID')

In [43]:
transf_is_0_morning_tuesday['GETOFF_BUS_STTN_ID'].value_counts().head(10)

GETOFF_BUS_STTN_ID
8002453    3546
8002412    1937
8001097    1909
8002814    1762
8001927    1609
8002969    1536
8001420    1454
8002279    1387
8001934    1384
8001412    1362
Name: count, dtype: int64

## 퇴근 시간대 (16시, 17시, 18시)

In [None]:
from preprocessing import transf_data_preprocessing

df_0404_leave_to = transf_data_preprocessing.filtering_time(df_0404, 16, 17, 18)
df_0411_leave_to = transf_data_preprocessing.filtering_time(df_0411, 16, 17, 18)
df_0418_leave_to = transf_data_preprocessing.filtering_time(df_0418, 16, 17, 18)
df_0425_leave_to = transf_data_preprocessing.filtering_time(df_0425, 16, 17, 18)

### 환승횟수 2회

In [None]:
transf_is_2_night_0404, transf_is_not_2_night_0404 = transf_data_preprocessing.transf_2_preprocessing(df_0404_leave_to)
transf_is_2_night_0411, transf_is_not_2_night_0411 = transf_data_preprocessing.transf_2_preprocessing(df_0411_leave_to)
transf_is_2_night_0418, transf_is_not_2_night_0418 = transf_data_preprocessing.transf_2_preprocessing(df_0418_leave_to)
transf_is_2_night_0425, transf_is_not_2_night_0425 = transf_data_preprocessing.transf_2_preprocessing(df_0425_leave_to)

In [None]:
transf_is_2_night_0404_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0411_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0418_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_2_night_0425_merge = transf_data_preprocessing.transf_2_merge(transf_is_2_night_0425, getrouteinfoall_df, bus_sttn_df)

In [None]:
transf_is_2_night_monday = pd.concat([transf_is_2_night_0404_merge,transf_is_2_night_0411_merge,transf_is_2_night_0418_merge,transf_is_2_night_0425_merge])
transf_is_2_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2,TR_MEANS_TYPE_3,ROUTE_NO_3,GETON_BUS_STTN_ID_3,GETON_PAY_3,GETON_DATETIME_3,GETOFF_BUS_STTN_ID_3,GETOFF_PAY_3,GETOFF_DATETIME_3,GETOFF_BUS_X_3,GETOFF_BUS_Y_3
0,644674991,676,102,0,8002345,1250,2022-04-04 16:01:22,36.33444,127.44585,8001348,0,2022-04-04 16:03:20,36.32906,127.441734,674,1,8001348,0,2022-04-04 16:07:24,36.32906,127.441734,8002119,0,2022-04-04 16:35:33,36.3117,127.37664,675,115,8002119,0,2022-04-04 16:45:14,8070011,0,2022-04-04 16:55:22,36.31854,127.34803
1,400151515,675,916,0,8002761,1250,2022-04-04 16:00:27,36.341293,127.39681,8002276,0,2022-04-04 16:03:44,36.337902,127.392494,675,211,8002276,0,2022-04-04 16:05:01,36.337902,127.392494,8002119,0,2022-04-04 16:20:21,36.3117,127.37664,681,21,8002119,0,2022-04-04 16:40:56,8002438,0,2022-04-04 16:53:45,36.297485,127.355156


### 환승횟수 1회

In [None]:
transf_is_1_night_0404, transf_is_not_1_night_0404 = transf_data_preprocessing.transf_1_preprocessing(df_0404_leave_to)
transf_is_1_night_0411, transf_is_not_1_night_0411 = transf_data_preprocessing.transf_1_preprocessing(df_0411_leave_to)
transf_is_1_night_0418, transf_is_not_1_night_0418 = transf_data_preprocessing.transf_1_preprocessing(df_0418_leave_to)
transf_is_1_night_0425, transf_is_not_1_night_0425 = transf_data_preprocessing.transf_1_preprocessing(df_0425_leave_to)

In [None]:
transf_is_1_night_0404_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0411_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0418_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_1_night_0425_merge = transf_data_preprocessing.transf_1_merge(transf_is_1_night_0425, getrouteinfoall_df, bus_sttn_df)

In [None]:
transf_is_1_night_monday = pd.concat([transf_is_1_night_0404_merge,transf_is_1_night_0411_merge,transf_is_1_night_0418_merge,transf_is_1_night_0425_merge])
transf_is_1_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE_1,ROUTE_NO_1,TRANSF_CNT,GETON_BUS_STTN_ID_1,GETON_PAY_1,GETON_DATETIME_1,GETON_BUS_X_1,GETON_BUS_Y_1,GETOFF_BUS_STTN_ID_1,GETOFF_PAY_1,GETOFF_DATETIME_1,GETOFF_BUS_X_1,GETOFF_BUS_Y_1,TR_MEANS_TYPE_2,ROUTE_NO_2,GETON_BUS_STTN_ID_2,GETON_PAY_2,GETON_DATETIME_2,GETON_BUS_X_2,GETON_BUS_Y_2,GETOFF_BUS_STTN_ID_2,GETOFF_PAY_2,GETOFF_DATETIME_2,GETOFF_BUS_X_2,GETOFF_BUS_Y_2
0,90026785,675,314,0,8002707,1250,2022-04-04 16:17:35,36.30422,127.37948,8001653,0,2022-04-04 16:22:58,36.3103,127.390205,675,513,8001653,0,2022-04-04 16:41:00,36.3103,127.390205,8002457,0,2022-04-04 17:10:56,36.301357,127.45215
1,786002827,676,606,0,8001102,1250,2022-04-04 16:21:04,36.382385,127.37842,8001876,0,2022-04-04 16:55:14,36.336464,127.43009,679,501,8001876,0,2022-04-04 16:58:50,36.336464,127.43009,8002457,0,2022-04-04 17:18:51,36.301357,127.45215


### 환승횟수 0회

In [None]:
transf_is_0_night_0404, transf_is_not_0_night_0404 = transf_data_preprocessing.transf_0_preprocessing(df_0404_leave_to)
transf_is_0_night_0411, transf_is_not_0_night_0411 = transf_data_preprocessing.transf_0_preprocessing(df_0411_leave_to)
transf_is_0_night_0418, transf_is_not_0_night_0418 = transf_data_preprocessing.transf_0_preprocessing(df_0418_leave_to)
transf_is_0_night_0425, transf_is_not_0_night_0425 = transf_data_preprocessing.transf_0_preprocessing(df_0425_leave_to)

In [None]:
transf_is_0_night_0404_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0404, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0411_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0411, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0418_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0418, getrouteinfoall_df, bus_sttn_df)
transf_is_0_night_0425_merge = transf_data_preprocessing.transf_0_merge(transf_is_0_night_0425, getrouteinfoall_df, bus_sttn_df)

In [None]:
transf_is_0_night_monday = pd.concat([transf_is_0_night_0404_merge,transf_is_0_night_0411_merge,transf_is_0_night_0418_merge,transf_is_0_night_0425_merge])
transf_is_0_night_monday.head(2)

Unnamed: 0,CARD_NO,TR_MEANS_TYPE,ROUTE_NO,TRANSF_CNT,GETON_BUS_STTN_ID,GETON_PAY,GETON_DATETIME,GETON_BUS_X,GETON_BUS_Y,GETOFF_BUS_STTN_ID,GETOFF_PAY,GETOFF_DATETIME,GETOFF_BUS_X,GETOFF_BUS_Y
0,661771805,676,705,0,8001922,1250,2022-04-04 16:36:36,36.36827,127.37935,8002702,0,2022-04-04 16:53:03,36.35111,127.39719
1,708856323,676,705,0,8001922,1250,2022-04-04 18:06:14,36.36827,127.37935,8002702,0,2022-04-04 18:31:11,36.35111,127.39719


# 수요일

In [31]:
df_0406 = day_preprocessing(smart_card_data, 6)
df_0413 = day_preprocessing(smart_card_data, 13)
df_0420 = day_preprocessing(smart_card_data, 20)
df_0427 = day_preprocessing(smart_card_data, 27)

# 목요일

In [32]:
df_0407 = day_preprocessing(smart_card_data, 7)
df_0414 = day_preprocessing(smart_card_data, 14)
df_0421 = day_preprocessing(smart_card_data, 21)
df_0428 = day_preprocessing(smart_card_data, 28)

# 금요일

In [33]:
df_0401 = day_preprocessing(smart_card_data, 1)
df_0408 = day_preprocessing(smart_card_data, 8)
df_0415 = day_preprocessing(smart_card_data, 15)
df_0422 = day_preprocessing(smart_card_data, 22)
df_0429 = day_preprocessing(smart_card_data, 29)