In [1]:
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)
pd.set_option('mode.chained_assignment',  None)

plt.rc('font',family='malgun gothic')

smart_card = pd.read_csv('./data/SMART_CARD.csv', header = None)

getrouteinfoall = pd.read_csv('./data/getRouteInfoAll.csv', encoding = 'cp949', index_col = 0)
getstationbyrouteall = pd.read_csv('./data/getStationByRouteAll.csv', encoding = 'cp949', index_col = 0)
bus_sttn = pd.read_csv('./data/bus_sttn.csv')

### getrouteinfoall
getrouteinfoall_df = getrouteinfoall[['ROUTE_CD','ROUTE_NO']] 

### gestationbyrouteall
getstationbyrouteall_df = getstationbyrouteall[['ROUTE_CD','BUSSTOP_NM','BUSSTOP_SEQ','TOTAL_DIST','BUS_NODE_ID']]

### bus_sttn
bus_sttn_df = bus_sttn[['BUS_STTN_ID', 'POSX', 'POSY']]

In [2]:
def smart_card_preprocessing(exdata): # 스마트카드 데이터 전처리
    exdata.columns = ['SERIAL_NO', 'CARD_NO', 'GETON_YMD', 'GETON_TM', 'BUS_ROUTE_ID', 'BUS_CO_ID', 'BUS_CAR_ID', 'GETON_BUS_STTN_ID', 'GETOFF_BUS_STTN_ID',
                      'DRVR_ID', 'TRANSACTION_ID', 'TR_MEANS_TYPE', 'USR_TYPE', 'DRVN_START_YMD', 'DRVN_START_TM', 'GETOFF_YMD', 'GETOFF_TM', 'GETON_CALC_YMD',
                      'GETOFF_CALC_YMD', 'GETON_PAY', 'GETOFF_PAY', 'TRANSF_CNT', 'USR_CNT']
    
    exdata = exdata[exdata['GETOFF_BUS_STTN_ID']!='~      '] # 하차 태그 없는 데이터 제거
    exdata['GETON_TM'] = exdata['GETON_TM'].apply(lambda x: str(x).zfill(6)) # GETON_TM 컬럼 날짜형식으로 변환하기 위해 자릿수 맞춰주기
    
    # datetime 형식으로 변환 및 새로운 column 생성
    exdata['GETON_DATETIME'] = pd.to_datetime(exdata['GETON_YMD'].astype(str) + exdata['GETON_TM'].astype(str), format = '%Y%m%d%H%M%S')
    exdata['GETOFF_DATETIME'] = pd.to_datetime(exdata['GETOFF_YMD'].astype(str) + exdata['GETOFF_TM'].astype(str), format = '%Y%m%d%H%M%S')
    
    exdata['DAY'] = exdata['GETON_DATETIME'].dt.day
    exdata['WEEKDAY'] = exdata['GETON_DATETIME'].dt.weekday  

    # 급행/지선/간선만 필터링
    exdata = exdata[exdata['TR_MEANS_TYPE'].isin([674,675,676])]

    # 평일만 추출
    weekday_values = [0, 1, 2, 3, 4]
    exdata = exdata[exdata['WEEKDAY'].isin(weekday_values)]

    # 6/6일 제거
    exdata = exdata[exdata['DAY'] != 6]
    
    # 비가 온 날 제거
    exdata = exdata[~exdata['DAY'].isin([26,29])]
    
    # int형으로 변환
    exdata['GETOFF_BUS_STTN_ID'] = exdata['GETOFF_BUS_STTN_ID'].astype(int)
    
    # 여러명이 탄 경우 제거
    exdata = exdata[exdata['USR_CNT']==1]
    
    # 사용한 행 제거
    exdata.drop(labels = ['GETON_YMD','GETON_TM','GETOFF_YMD','GETOFF_TM'], axis = 1, inplace = True)

    # 필요없는 행 제거
    exdata.drop(labels = ['SERIAL_NO', 'DRVR_ID','GETON_PAY','GETOFF_PAY','USR_TYPE','USR_CNT','GETON_CALC_YMD', 'GETOFF_CALC_YMD', 'TRANSACTION_ID'], axis = 1, inplace = True)
    
    exdata = exdata[['CARD_NO','BUS_ROUTE_ID', 'TR_MEANS_TYPE', 'GETON_BUS_STTN_ID', 'GETON_DATETIME', 'GETOFF_BUS_STTN_ID', 'GETOFF_DATETIME','DAY']]
    return exdata

In [3]:
smart_card_data = smart_card_preprocessing(smart_card)

In [4]:
df = smart_card_data.copy()
df.head()

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,TR_MEANS_TYPE,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,DAY
0,677731909,30300100,674,8001029,2023-06-01 05:19:23,8002629,2023-06-01 05:45:44,1
1,657743613,30300100,674,8001029,2023-06-01 05:19:26,8001413,2023-06-01 05:47:12,1
2,312433278,30300100,674,8001029,2023-06-01 05:20:09,8001927,2023-06-01 05:39:38,1
4,769071298,30300100,674,8001029,2023-06-01 05:20:36,8001934,2023-06-01 05:36:55,1
7,697543930,30300100,674,8002351,2023-06-01 05:27:31,8002118,2023-06-01 05:40:55,1


In [5]:
df.shape

(5862099, 8)

In [6]:
# 급행, 지선/간선
BUS_EXPRESS = df[df['TR_MEANS_TYPE'].isin([674])]
CITY_BUS = df[~df['TR_MEANS_TYPE'].isin([674])]

In [7]:
def going_up(getstationbyrouteall,route_cd):
    exdata = getstationbyrouteall[getstationbyrouteall['ROUTE_CD']==route_cd].reset_index()
    index_condition = exdata[exdata['BUSSTOP_TP'] == '2'].index.max()
    exdata = exdata.iloc[:index_condition]
    return exdata

def going_down(getstationbyrouteall,route_cd):
    exdata = getstationbyrouteall[getstationbyrouteall['ROUTE_CD']==route_cd].reset_index()
    index_condition = exdata[exdata['BUSSTOP_TP'] == '2'].index.max()
    exdata = exdata.iloc[index_condition:]
    return exdata

In [8]:
# 거리 계산을 위한 함수 정의
def calculate_distance(row):
    return row['GETOFF_GEOMETRY'].distance(row['GETON_GEOMETRY']) * 111.32  # Approximate factor for km

In [311]:
# import pandas as pd
# import seaborn as sns
# import matplotlib.pyplot as plt

# # 사용량을 카운트하여 데이터프레임 생성
# usage_counts = CITY_BUS['BUS_ROUTE_ID'].value_counts().reset_index()
# usage_counts.columns = ['BUS_ROUTE_ID', 'Usage']

# # 평균 계산
# mean_usage = usage_counts['Usage'].mean()

# # 시각화
# plt.figure(figsize=(10, 6))
# sns.barplot(data=usage_counts, x='BUS_ROUTE_ID', y='Usage', palette='viridis')
# plt.axhline(mean_usage, color='red', linestyle='dashed', label='Mean Usage')
# plt.title('Bus Route Usage Counts')
# plt.xlabel('Bus Route ID')
# plt.ylabel('Usage Count')
# plt.xticks(rotation=-90)
# plt.legend()
# plt.tight_layout()
# plt.show()

In [9]:
# 잘못탄 경우 제거
CITY_BUS = CITY_BUS[CITY_BUS['GETON_BUS_STTN_ID'] != CITY_BUS['GETOFF_BUS_STTN_ID']]

In [10]:
CITY_BUS_TOP = CITY_BUS[CITY_BUS['BUS_ROUTE_ID'].map(CITY_BUS['BUS_ROUTE_ID'].value_counts()) > CITY_BUS['BUS_ROUTE_ID'].value_counts().mean()] # 사용량이 평균값 이상인 노선만 필터링
CITY_BUS_TOP.head(2)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,TR_MEANS_TYPE,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,DAY
35,929954937,30300043,676,8001243,2023-06-01 05:30:22,8002717,2023-06-01 05:59:28,1
38,943538994,30300043,676,8001243,2023-06-01 05:30:25,8005590,2023-06-01 06:16:11,1


In [11]:
CITY_BUS_ROUTE_NO = CITY_BUS_TOP.merge(getrouteinfoall_df, left_on = 'BUS_ROUTE_ID', right_on = 'ROUTE_CD')
CITY_BUS_ROUTE_NO = CITY_BUS_ROUTE_NO[['CARD_NO', 'BUS_ROUTE_ID', 'ROUTE_NO', 'GETON_BUS_STTN_ID', 'GETON_DATETIME', 'GETOFF_BUS_STTN_ID', 'GETOFF_DATETIME']]
CITY_BUS_ROUTE_NO.head(2)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,8002717,2023-06-01 05:59:28
1,943538994,30300043,108,8001243,2023-06-01 05:30:25,8005590,2023-06-01 06:16:11


In [12]:
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO.merge(bus_sttn_df, left_on = 'GETON_BUS_STTN_ID', right_on = 'BUS_STTN_ID')
CITY_BUS_ROUTE_NO_COORDINATE_MERGE.rename(columns={'POSX' : 'GETON_POSX', 'POSY' : 'GETON_POSY'}, inplace = True)
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[['CARD_NO','BUS_ROUTE_ID','ROUTE_NO','GETON_BUS_STTN_ID','GETON_DATETIME','GETON_POSX','GETON_POSY','GETOFF_BUS_STTN_ID','GETOFF_DATETIME']]
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE.merge(bus_sttn_df, left_on = 'GETOFF_BUS_STTN_ID', right_on = 'BUS_STTN_ID')
CITY_BUS_ROUTE_NO_COORDINATE_MERGE.rename(columns={'POSX' : 'GETOFF_POSX', 'POSY' : 'GETOFF_POSY'}, inplace = True)
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[['CARD_NO','BUS_ROUTE_ID','ROUTE_NO','GETON_BUS_STTN_ID','GETON_DATETIME','GETON_POSX','GETON_POSY','GETOFF_BUS_STTN_ID','GETOFF_DATETIME','GETOFF_POSX','GETOFF_POSY']]
CITY_BUS_ROUTE_NO_COORDINATE_MERGE

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,36.307575,127.446390,8002717,2023-06-01 05:59:28,36.349094,127.392940
1,933491920,30300043,108,8001243,2023-06-01 07:53:09,36.307575,127.446390,8002717,2023-06-01 08:31:30,36.349094,127.392940
2,939205401,30300043,108,8001243,2023-06-02 12:35:32,36.307575,127.446390,8002717,2023-06-02 13:05:50,36.349094,127.392940
3,698718151,30300043,108,8001243,2023-06-02 14:25:37,36.307575,127.446390,8002717,2023-06-02 14:58:21,36.349094,127.392940
4,929954937,30300043,108,8001243,2023-06-05 05:31:36,36.307575,127.446390,8002717,2023-06-05 05:59:36,36.349094,127.392940
...,...,...,...,...,...,...,...,...,...,...,...
3684070,550868269,30300079,615,8002549,2023-06-30 15:01:20,36.294163,127.362045,8001199,2023-06-30 15:10:34,36.306656,127.365060
3684071,550342662,30300079,615,8002549,2023-06-30 15:01:34,36.294163,127.362045,8001199,2023-06-30 15:10:25,36.306656,127.365060
3684072,550342710,30300079,615,8002549,2023-06-30 15:01:39,36.294163,127.362045,8001199,2023-06-30 15:10:28,36.306656,127.365060
3684073,549470533,30300087,703,9007448,2023-06-12 18:57:40,36.291233,127.360240,8001199,2023-06-12 19:05:16,36.306656,127.365060


In [13]:
from shapely.geometry import Point

# 지오메트리 컬럼 생성
CITY_BUS_ROUTE_NO_COORDINATE_MERGE["GETON_GEOMETRY"] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE.apply(lambda row: Point(row["GETON_POSX"], row["GETON_POSY"]), axis=1)
CITY_BUS_ROUTE_NO_COORDINATE_MERGE["GETOFF_GEOMETRY"] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE.apply(lambda row: Point(row["GETOFF_POSX"], row["GETOFF_POSY"]), axis=1)

In [14]:
CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DIFF_TIME'] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE['GETOFF_DATETIME'] - CITY_BUS_ROUTE_NO_COORDINATE_MERGE['GETON_DATETIME']
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DIFF_TIME'] > pd.Timedelta(seconds=30)]
CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DIFF_HOURS'] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DIFF_TIME'].dt.total_seconds() / 3600 # DIFF_TIME을 시간 단위로 변환
CITY_BUS_ROUTE_NO_COORDINATE_MERGE.head(3)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,GETON_GEOMETRY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,36.307575,127.44639,8002717,2023-06-01 05:59:28,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:29:06,0.485
1,933491920,30300043,108,8001243,2023-06-01 07:53:09,36.307575,127.44639,8002717,2023-06-01 08:31:30,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:38:21,0.639167
2,939205401,30300043,108,8001243,2023-06-02 12:35:32,36.307575,127.44639,8002717,2023-06-02 13:05:50,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:30:18,0.505


In [15]:
# 거리 계산 후 속도 계산
CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DISTANCE_KM'] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE.apply(calculate_distance, axis=1)
CITY_BUS_ROUTE_NO_COORDINATE_MERGE['SPEED_KMH'] = CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DISTANCE_KM'] / CITY_BUS_ROUTE_NO_COORDINATE_MERGE['DIFF_HOURS']

In [16]:
# 속력 50 이상 제거
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[CITY_BUS_ROUTE_NO_COORDINATE_MERGE['SPEED_KMH']<50]
# 속력 1 이하 제거
CITY_BUS_ROUTE_NO_COORDINATE_MERGE = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[CITY_BUS_ROUTE_NO_COORDINATE_MERGE['SPEED_KMH']>=1]

In [17]:
CITY_BUS_ROUTE_NO_COORDINATE_MERGE.head(2)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,GETON_GEOMETRY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS,DISTANCE_KM,SPEED_KMH
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,36.307575,127.44639,8002717,2023-06-01 05:59:28,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:29:06,0.485,7.534259,15.534554
1,933491920,30300043,108,8001243,2023-06-01 07:53:09,36.307575,127.44639,8002717,2023-06-01 08:31:30,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:38:21,0.639167,7.534259,11.787628


In [18]:
getstationbyrouteall_df = getstationbyrouteall[['ROUTE_CD','BUS_NODE_ID','BUSSTOP_SEQ','BUSSTOP_TP']]

---

In [19]:
ROUTE_30300043 = getstationbyrouteall_df[getstationbyrouteall_df['ROUTE_CD']==30300043]
ROUTE_30300043 = ROUTE_30300043[['BUS_NODE_ID', 'BUSSTOP_SEQ','BUSSTOP_TP']]

In [20]:
ROUTE_30300043

Unnamed: 0,BUS_NODE_ID,BUSSTOP_SEQ,BUSSTOP_TP
2706,9002737,1,1
2707,8005587,2,
2708,8005589,3,
2709,8002725,4,
2710,8002984,5,
...,...,...,...
2790,8005921,85,
2791,8001296,86,
2792,8005590,87,
2793,8005588,88,


In [846]:
BUS_108 = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[CITY_BUS_ROUTE_NO_COORDINATE_MERGE['ROUTE_NO']=='108'].merge(ROUTE_30300043, left_on = 'GETON_BUS_STTN_ID', right_on = 'BUS_NODE_ID')
BUS_108.rename(columns = {'BUSSTOP_SEQ' : 'GETON_BUSSTOP_SEQ'}, inplace = True)
BUS_108 = BUS_108[['CARD_NO', 'BUS_ROUTE_ID', 'ROUTE_NO', 'GETON_BUS_STTN_ID','GETON_DATETIME', 'GETON_POSX', 'GETON_POSY', 'GETON_BUSSTOP_SEQ','GETOFF_BUS_STTN_ID',
                   'GETOFF_DATETIME', 'GETOFF_POSX', 'GETOFF_POSY', 'GETON_GEOMETRY', 'GETOFF_GEOMETRY', 'DIFF_TIME', 'DIFF_HOURS', 'DISTANCE_KM','SPEED_KMH']]

BUS_108 = BUS_108[BUS_108['ROUTE_NO']=='108'].merge(ROUTE_30300043, left_on = 'GETOFF_BUS_STTN_ID', right_on = 'BUS_NODE_ID')
BUS_108.rename(columns = {'BUSSTOP_SEQ' : 'GETOFF_BUSSTOP_SEQ'}, inplace = True)
BUS_108 = BUS_108[['CARD_NO', 'BUS_ROUTE_ID', 'ROUTE_NO', 'GETON_BUS_STTN_ID', 'GETON_BUSSTOP_SEQ','GETON_DATETIME', 'GETON_POSX', 'GETON_POSY', 'GETON_GEOMETRY',
                   'GETOFF_BUS_STTN_ID', 'GETOFF_BUSSTOP_SEQ','GETOFF_DATETIME', 'GETOFF_POSX', 'GETOFF_POSY', 'GETOFF_GEOMETRY', 'DIFF_TIME', 'DIFF_HOURS', 'DISTANCE_KM','SPEED_KMH']]
BUS_108.head()

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_BUSSTOP_SEQ,GETON_DATETIME,GETON_POSX,GETON_POSY,GETON_GEOMETRY,GETOFF_BUS_STTN_ID,GETOFF_BUSSTOP_SEQ,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS,DISTANCE_KM,SPEED_KMH
0,929954937,30300043,108,8001243,58,2023-06-01 05:30:22,36.307575,127.44639,POINT (36.307575 127.44639),8002717,75,2023-06-01 05:59:28,36.349094,127.39294,POINT (36.349094 127.39294),0 days 00:29:06,0.485,7.534259,15.534554
1,933491920,30300043,108,8001243,58,2023-06-01 07:53:09,36.307575,127.44639,POINT (36.307575 127.44639),8002717,75,2023-06-01 08:31:30,36.349094,127.39294,POINT (36.349094 127.39294),0 days 00:38:21,0.639167,7.534259,11.787628
2,939205401,30300043,108,8001243,58,2023-06-02 12:35:32,36.307575,127.44639,POINT (36.307575 127.44639),8002717,75,2023-06-02 13:05:50,36.349094,127.39294,POINT (36.349094 127.39294),0 days 00:30:18,0.505,7.534259,14.919324
3,698718151,30300043,108,8001243,58,2023-06-02 14:25:37,36.307575,127.44639,POINT (36.307575 127.44639),8002717,75,2023-06-02 14:58:21,36.349094,127.39294,POINT (36.349094 127.39294),0 days 00:32:44,0.545556,7.534259,13.81025
4,929954937,30300043,108,8001243,58,2023-06-05 05:31:36,36.307575,127.44639,POINT (36.307575 127.44639),8002717,75,2023-06-05 05:59:36,36.349094,127.39294,POINT (36.349094 127.39294),0 days 00:28:00,0.466667,7.534259,16.14484


In [847]:
BUS_108_MEAN_SPEED = pd.DataFrame(BUS_108.pivot_table(index='GETOFF_BUSSTOP_SEQ', columns='GETON_BUSSTOP_SEQ', values='SPEED_KMH').mean())
BUS_108_MEAN_SPEED.columns = ['BUS_108_MEAN_SPEED']
BUS_108_MEAN_SPEED= BUS_108_MEAN_SPEED.T

In [848]:
new_columns_df = pd.DataFrame()

for i in range(1, 89):
    if i in BUS_108_MEAN_SPEED.columns and i+1 in BUS_108_MEAN_SPEED.columns:
        column_name = f'{i}~{i+1}'  # Create the column name
        new_columns_df[column_name] = (BUS_108_MEAN_SPEED[i] + BUS_108_MEAN_SPEED[i+1]) / 2


In [849]:
def station_route_merge(getstationbyrouteall,route_cd):
    exdata = getstationbyrouteall[getstationbyrouteall['ROUTE_CD']==route_cd].reset_index()
    index_condition = exdata[exdata['BUSSTOP_TP'] == '2'].index.max() -1
    exdata = exdata.iloc[:index_condition]
    return exdata

In [850]:
new_columns_df

Unnamed: 0,1~2,2~3,3~4,4~5,5~6,6~7,7~8,8~9,9~10,10~11,11~12,12~13,13~14,14~15,15~16,16~17,17~18,18~19,19~20,20~21,21~22,22~23,23~24,24~25,25~26,...,64~65,65~66,66~67,67~68,68~69,69~70,70~71,71~72,72~73,73~74,74~75,75~76,76~77,77~78,78~79,79~80,80~81,81~82,82~83,83~84,84~85,85~86,86~87,87~88,88~89
BUS_108_MEAN_SPEED,13.171991,14.41374,16.018354,16.120791,14.42415,14.481979,14.909997,14.833401,14.910387,14.343877,14.235059,15.268144,16.467657,17.554447,18.217114,17.289731,16.032098,16.03692,16.696338,16.833296,16.935898,16.625243,17.581895,19.435791,20.013508,...,14.065789,13.211941,14.342075,14.789866,14.537419,15.651665,16.615564,16.290097,13.893429,13.342154,14.231315,14.386985,14.296975,16.199336,15.893845,16.227051,18.33174,17.235517,18.088365,22.151345,23.32886,17.172951,14.688159,17.272953,14.94672


In [851]:
BUS_108_COORDINATE = BUS_108[['GETON_BUSSTOP_SEQ','GETON_GEOMETRY']]
BUS_108_COORDINATE = BUS_108_COORDINATE.drop_duplicates(subset = ['GETON_BUSSTOP_SEQ']).sort_values('GETON_BUSSTOP_SEQ').reset_index(drop = True)
BUS_108_COORDINATE

Unnamed: 0,GETON_BUSSTOP_SEQ,GETON_GEOMETRY
0,1,POINT (36.367615 127.35218)
1,2,POINT (36.36881 127.34573)
2,3,POINT (36.36524 127.345184)
3,4,POINT (36.361355 127.34603)
4,5,POINT (36.359398 127.3545)
...,...,...
83,85,POINT (36.359795 127.35468)
84,86,POINT (36.36134 127.34719)
85,87,POINT (36.36533 127.34543)
86,88,POINT (36.369488 127.34661)


In [852]:
BUS_108_COORDINATE = BUS_108[['GETON_BUSSTOP_SEQ','GETON_POSX','GETON_POSY']]

In [853]:
BUS_108_COORDINATE = BUS_108_COORDINATE.drop_duplicates(subset = ['GETON_BUSSTOP_SEQ']).sort_values('GETON_BUSSTOP_SEQ').reset_index(drop = True)

In [854]:
for i in range(1, 89):
    if i in BUS_108_COORDINATE.columns and i+1 in BUS_108_COORDINATE.columns:
        column_name = f'{i}~{i+1}'  # Create the column name
        
        # Create Point instances from coordinates
        point1 = Point(BUS_108_COORDINATE[i])
        point2 = Point(BUS_108_COORDINATE[i+1])
        
        # Create LineString from the points
        line = LineString([point1, point2])
        
        # Store the LineString as WKT in the new DataFrame
        new_coor_df[column_name] = [line.wkt]

In [855]:
from shapely.geometry import Point, LineString

new_coor_df = pd.DataFrame()

for i in range(1, 89):
    if i in BUS_108_COORDINATE.index and i+1 in BUS_108_COORDINATE.index:
        column_name = f'{i}~{i+1}'  # Create the column name
        
        # Create Point instances from coordinates
        point1 = Point(BUS_108_COORDINATE.loc[i, 'GETON_POSX'], BUS_108_COORDINATE.loc[i, 'GETON_POSY'])
        point2 = Point(BUS_108_COORDINATE.loc[i+1, 'GETON_POSX'], BUS_108_COORDINATE.loc[i+1, 'GETON_POSY'])
        
        # Create LineString from the points
        line = LineString([point1, point2])
        
        # Store the LineString as WKT in the new DataFrame
        new_coor_df[column_name] = [line.wkt]

In [856]:
new_coor_df

Unnamed: 0,1~2,2~3,3~4,4~5,5~6,6~7,7~8,8~9,9~10,10~11,11~12,12~13,13~14,14~15,15~16,16~17,17~18,18~19,19~20,20~21,21~22,22~23,23~24,24~25,25~26,...,62~63,63~64,64~65,65~66,66~67,67~68,68~69,69~70,70~71,71~72,72~73,73~74,74~75,75~76,76~77,77~78,78~79,79~80,80~81,81~82,82~83,83~84,84~85,85~86,86~87
0,"LINESTRING (36.36881 127.34573, 36.36524 127.3...","LINESTRING (36.36524 127.345184, 36.361355 127...","LINESTRING (36.361355 127.34603, 36.359398 127...","LINESTRING (36.359398 127.3545, 36.3585 127.36...","LINESTRING (36.3585 127.36101, 36.35971 127.3657)","LINESTRING (36.35971 127.3657, 36.35968 127.37...","LINESTRING (36.35968 127.371956, 36.359646 127...","LINESTRING (36.359646 127.376045, 36.359634 12...","LINESTRING (36.359634 127.38034, 36.359146 127...","LINESTRING (36.359146 127.39001, 36.35506 127....","LINESTRING (36.35506 127.39004, 36.35047 127.3...","LINESTRING (36.35047 127.39005, 36.348835 127....","LINESTRING (36.348835 127.392265, 36.348804 12...","LINESTRING (36.348804 127.39831, 36.347336 127...","LINESTRING (36.347336 127.401566, 36.341076 12...","LINESTRING (36.341076 127.41007, 36.338676 127...","LINESTRING (36.338676 127.41313, 36.33509 127....","LINESTRING (36.33509 127.417625, 36.330883 127...","LINESTRING (36.330883 127.419235, 36.328552 12...","LINESTRING (36.328552 127.420425, 36.324142 12...","LINESTRING (36.324142 127.42239, 36.319958 127...","LINESTRING (36.319958 127.42422, 36.318443 127...","LINESTRING (36.318443 127.42512, 36.313843 127...","LINESTRING (36.313843 127.42812, 36.313366 127...","LINESTRING (36.313366 127.430954, 36.312645 12...",...,"LINESTRING (36.318096 127.42571, 36.320004 127...","LINESTRING (36.320004 127.42453, 36.323544 127...","LINESTRING (36.323544 127.4229, 36.32508 127.4...","LINESTRING (36.32508 127.42239, 36.328785 127....","LINESTRING (36.328785 127.420654, 36.330765 12...","LINESTRING (36.330765 127.41955, 36.335632 127...","LINESTRING (36.335632 127.41735, 36.339115 127...","LINESTRING (36.339115 127.413025, 36.34149 127...","LINESTRING (36.34149 127.410034, 36.345898 127...","LINESTRING (36.345898 127.40215, 36.349113 127...","LINESTRING (36.349113 127.399124, 36.349094 12...","LINESTRING (36.349094 127.39294, 36.349995 127...","LINESTRING (36.349995 127.390335, 36.354385 12...","LINESTRING (36.354385 127.390335, 36.35865 127...","LINESTRING (36.35865 127.39041, 36.35989 127.3...","LINESTRING (36.35989 127.381134, 36.359932 127...","LINESTRING (36.359932 127.37881, 36.35993 127....","LINESTRING (36.35993 127.376175, 36.35995 127....","LINESTRING (36.35995 127.370865, 36.35995 127....","LINESTRING (36.35995 127.36659, 36.35873 127.3...","LINESTRING (36.35873 127.362434, 36.359795 127...","LINESTRING (36.359795 127.35468, 36.36134 127....","LINESTRING (36.36134 127.34719, 36.36533 127.3...","LINESTRING (36.36533 127.34543, 36.369488 127....","LINESTRING (36.369488 127.34661, 36.36739 127...."


In [860]:
BUS_108_GEO = pd.concat([new_columns_df, new_coor_df]).T
BUS_108_GEO.columns = ['BUS_108_MEAN_SPEED', 'BUS_108_GEOMETRY']
BUS_108_GEO.loc[BUS_108_GEO['BUS_108_MEAN_SPEED'] >= 20, 'speed_range'] = 'fast'
BUS_108_GEO.loc[(BUS_108_GEO['BUS_108_MEAN_SPEED'] >= 10) & (BUS_108_GEO['BUS_108_MEAN_SPEED'] < 20), 'speed_range'] = 'medium'
BUS_108_GEO.loc[BUS_108_GEO['BUS_108_MEAN_SPEED'] < 10, 'speed_range'] = 'slow'
BUS_108_GEO

Unnamed: 0,BUS_108_MEAN_SPEED,BUS_108_GEOMETRY,speed_range
1~2,13.171991,"LINESTRING (36.36881 127.34573, 36.36524 127.3...",medium
2~3,14.41374,"LINESTRING (36.36524 127.345184, 36.361355 127...",medium
3~4,16.018354,"LINESTRING (36.361355 127.34603, 36.359398 127...",medium
4~5,16.120791,"LINESTRING (36.359398 127.3545, 36.3585 127.36...",medium
5~6,14.42415,"LINESTRING (36.3585 127.36101, 36.35971 127.3657)",medium
...,...,...,...
86~87,14.688159,"LINESTRING (36.369488 127.34661, 36.36739 127....",medium
87~88,17.272953,,medium
88~89,14.94672,,medium
41~42,,"LINESTRING (36.270203 127.47669, 36.26705 127....",


In [862]:
BUS_108_GEO_80 = BUS_108_GEO[:80]

In [863]:
import folium
from shapely.wkt import loads
import pandas as pd


# Create a Folium map centered around the first LineString
m = folium.Map(location=[36.365, 127.352], tiles='cartodbdark_matter', zoom_start=11)

# Define a colormap based on speed ranges
colormap = {
    'slow': 'red',
    'medium': 'green',
    'fast': 'blue'
}

# Add LineString geometries to the map with different colors based on speed ranges
for idx, row in BUS_108_GEO[:40].iterrows():
    if row['BUS_108_GEOMETRY'] is not None:
        line = loads(row['BUS_108_GEOMETRY'])
        coords = line.coords
        color = colormap.get(row['speed_range'], 'gray')  # Use gray color if speed_range is unknown
        folium.PolyLine(locations=coords, color=color, weight=7, popup=f"Speed: {row['BUS_108_MEAN_SPEED']} km/h").add_to(m)

# Display the map
# Add legend to the map
legend_html = """
<div style="position: fixed; bottom: 50px; left: 50px; width: 100px; height: 100px; 
     border:2px solid grey; z-index:9999; font-size:14px;
     background-color:white; opacity: 0.8;">
     <p style="margin: 5px;"><span style="color: blue;">&#9679;</span> Slow</p>
     <p style="margin: 5px;"><span style="color: green;">&#9679;</span> Medium</p>
     <p style="margin: 5px;"><span style="color: red;">&#9679;</span> Fast</p>
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m


In [609]:
# for seq in range(1, 89):
#     geton_seq = seq
#     getoff_seq = seq + 1
    
#     mean_speed = BUS_108[(BUS_108['GETON_BUSSTOP_SEQ'] == geton_seq) & (BUS_108['GETOFF_BUSSTOP_SEQ'] == getoff_seq)]['SPEED_KMH'].mean()
    
#     print(f"section{geton_seq}_section{getoff_seq} : Mean Speed = {mean_speed}")


## 108번 버스 30300043

In [400]:
BUS_108 = CITY_BUS_ROUTE_NO_COORDINATE_MERGE[CITY_BUS_ROUTE_NO_COORDINATE_MERGE['BUS_ROUTE_ID']==CITY_BUS_ROUTE_NO_COORDINATE_MERGE['BUS_ROUTE_ID'].unique()[0]]
BUS_108.head(2)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,GETON_GEOMETRY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS,DISTANCE_KM,SPEED_KMH
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,36.307575,127.44639,8002717,2023-06-01 05:59:28,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:29:06,0.485,7.534259,15.534554
1,933491920,30300043,108,8001243,2023-06-01 07:53:09,36.307575,127.44639,8002717,2023-06-01 08:31:30,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:38:21,0.639167,7.534259,11.787628


### 5시

In [410]:
BUS_108_05 = BUS_108[BUS_108['GETON_DATETIME'].dt.hour == 5]
BUS_108_05.head()

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,GETON_GEOMETRY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS,DISTANCE_KM,SPEED_KMH
0,929954937,30300043,108,8001243,2023-06-01 05:30:22,36.307575,127.44639,8002717,2023-06-01 05:59:28,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:29:06,0.485,7.534259,15.534554
4,929954937,30300043,108,8001243,2023-06-05 05:31:36,36.307575,127.44639,8002717,2023-06-05 05:59:36,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:28:00,0.466667,7.534259,16.14484
6,929954937,30300043,108,8001243,2023-06-07 05:32:29,36.307575,127.44639,8002717,2023-06-07 05:59:31,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:27:02,0.450556,7.534259,16.722153
8,772294160,30300043,108,8001243,2023-06-08 05:32:21,36.307575,127.44639,8002717,2023-06-08 05:56:45,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:24:24,0.406667,7.534259,18.526866
9,929954937,30300043,108,8001243,2023-06-09 05:30:17,36.307575,127.44639,8002717,2023-06-09 05:57:04,36.349094,127.39294,POINT (36.307575 127.44639),POINT (36.349094 127.39294),0 days 00:26:47,0.446389,7.534259,16.87824


In [406]:
BUS_108_05[(BUS_108_05['GETON_BUS_STTN_ID'] == BUS_108_05['GETON_BUS_STTN_ID'].unique()[0]) & (BUS_108_05['GETOFF_BUS_STTN_ID'] == BUS_108_05['GETOFF_BUS_STTN_ID'].unique()[0])]['SPEED_KMH'].mean()

16.796918039858248

In [412]:
BUS_108_05_MEAN = BUS_108_05.copy()

In [413]:
BUS_108_05_MEAN['SPEED_MEAN'] = BUS_108_05[(BUS_108_05['GETON_BUS_STTN_ID'] == BUS_108_05['GETON_BUS_STTN_ID'].unique()[0]) & (BUS_108_05['GETOFF_BUS_STTN_ID'] == BUS_108_05['GETOFF_BUS_STTN_ID'].unique()[0])]['SPEED_KMH'].mean()

In [419]:
BUS_108_05_MEAN['SPEED_MEAN'] = BUS_108_05[(BUS_108_05['GETON_BUS_STTN_ID'] == BUS_108_05['GETON_BUS_STTN_ID'].unique()[0]) & (BUS_108_05['GETOFF_BUS_STTN_ID'] == BUS_108_05['GETOFF_BUS_STTN_ID'].unique()[1])]['SPEED_KMH'].mean()

In [418]:
BUS_108_05_MEAN = BUS_108_05_MEAN[['ROUTE_NO','GETON_BUS_STTN_ID','GETON_DATETIME','GETON_POSX','GETON_POSY', 'GETOFF_BUS_STTN_ID','GETOFF_DATETIME', 'GETOFF_POSX','GETOFF_POSY','SPEED_MEAN']]
BUS_108_05_MEAN = BUS_108_05_MEAN[:1]
BUS_108_05_MEAN

Unnamed: 0,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY,SPEED_MEAN
0,108,8001243,2023-06-01 05:30:22,36.307575,127.44639,8002717,2023-06-01 05:59:28,36.349094,127.39294,16.796918


---

In [235]:
DATA_0601 = df[df['DAY']==1]
DATA_0601.head(3)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,TR_MEANS_TYPE,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,DAY
0,677731909,30300100,674,8001029,2023-06-01 05:19:23,8002629,2023-06-01 05:45:44,1
1,657743613,30300100,674,8001029,2023-06-01 05:19:26,8001413,2023-06-01 05:47:12,1
2,312433278,30300100,674,8001029,2023-06-01 05:20:09,8001927,2023-06-01 05:39:38,1


In [185]:
ROUTE_NO_MERGE = DATA_0601.merge(getrouteinfoall_df, left_on = 'BUS_ROUTE_ID', right_on = 'ROUTE_CD')
ROUTE_NO_MERGE = ROUTE_NO_MERGE[['CARD_NO', 'BUS_ROUTE_ID', 'ROUTE_NO', 'GETON_BUS_STTN_ID', 'GETON_DATETIME', 'GETOFF_BUS_STTN_ID', 'GETOFF_DATETIME']]
ROUTE_NO_MERGE.head()

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME
0,677731909,30300100,1,8001029,2023-06-01 05:19:23,8002629,2023-06-01 05:45:44
1,657743613,30300100,1,8001029,2023-06-01 05:19:26,8001413,2023-06-01 05:47:12
2,312433278,30300100,1,8001029,2023-06-01 05:20:09,8001927,2023-06-01 05:39:38
3,769071298,30300100,1,8001029,2023-06-01 05:20:36,8001934,2023-06-01 05:36:55
4,697543930,30300100,1,8002351,2023-06-01 05:27:31,8002118,2023-06-01 05:40:55


In [197]:
COORDINATE_MERGE = ROUTE_NO_MERGE.merge(bus_sttn_df, left_on = 'GETON_BUS_STTN_ID', right_on = 'BUS_STTN_ID')
COORDINATE_MERGE.rename(columns={'POSX' : 'GETON_POSX', 'POSY' : 'GETON_POSY'}, inplace = True)
COORDINATE_MERGE = COORDINATE_MERGE[['CARD_NO','BUS_ROUTE_ID','ROUTE_NO','GETON_BUS_STTN_ID','GETON_DATETIME','GETON_POSX','GETON_POSY','GETOFF_BUS_STTN_ID','GETOFF_DATETIME']]
COORDINATE_MERGE = COORDINATE_MERGE.merge(bus_sttn_df, left_on = 'GETOFF_BUS_STTN_ID', right_on = 'BUS_STTN_ID')
COORDINATE_MERGE.rename(columns={'POSX' : 'GETOFF_POSX', 'POSY' : 'GETOFF_POSY'}, inplace = True)
COORDINATE_MERGE = COORDINATE_MERGE[['CARD_NO','BUS_ROUTE_ID','ROUTE_NO','GETON_BUS_STTN_ID','GETON_DATETIME','GETON_POSX','GETON_POSY','GETOFF_BUS_STTN_ID','GETOFF_DATETIME','GETOFF_POSX','GETOFF_POSY']]
COORDINATE_MERGE

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETON_POSX,GETON_POSY,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETOFF_POSX,GETOFF_POSY
0,677731909,30300100,1,8001029,2023-06-01 05:19:23,36.304470,127.351350,8002629,2023-06-01 05:45:44,36.328180,127.424965
1,769070523,30300100,1,8001029,2023-06-01 05:54:05,36.304470,127.351350,8002629,2023-06-01 06:13:06,36.328180,127.424965
2,713242790,30300100,1,8001029,2023-06-01 06:18:08,36.304470,127.351350,8002629,2023-06-01 06:41:14,36.328180,127.424965
3,750657469,30300100,1,8001029,2023-06-01 07:03:37,36.304470,127.351350,8002629,2023-06-01 07:28:07,36.328180,127.424965
4,932770609,30300100,1,8001029,2023-06-01 07:15:20,36.304470,127.351350,8002629,2023-06-01 07:38:37,36.328180,127.424965
...,...,...,...,...,...,...,...,...,...,...,...
373875,658208517,30300007,22,8001606,2023-06-01 08:09:21,36.247124,127.343330,8070037,2023-06-01 08:27:33,36.201332,127.340490
373876,541132516,30300007,22,8005534,2023-06-01 19:34:28,36.250110,127.342060,8070037,2023-06-01 19:51:00,36.201332,127.340490
373877,785011605,30300099,73,8001210,2023-06-01 12:06:13,36.458904,127.385220,8001333,2023-06-01 12:34:28,36.478940,127.395510
373878,658208517,30300007,22,8070038,2023-06-01 17:46:05,36.201443,127.340546,8007258,2023-06-01 17:57:27,36.246340,127.346750


In [198]:
from shapely.geometry import Point
import geopandas as gpd

# 지오메트리 컬럼 생성
COORDINATE_MERGE["GETON_GEOMETRY"] = COORDINATE_MERGE.apply(lambda row: Point(row["GETON_POSX"], row["GETON_POSY"]), axis=1)
COORDINATE_MERGE["GETOFF_GEOMETRY"] = COORDINATE_MERGE.apply(lambda row: Point(row["GETOFF_POSX"], row["GETOFF_POSY"]), axis=1)

In [199]:
COORDINATE_MERGE.drop(labels = ['GETON_POSX','GETON_POSY','GETOFF_POSX','GETOFF_POSY'], axis = 1, inplace = True)
COORDINATE_MERGE

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETON_GEOMETRY,GETOFF_GEOMETRY
0,677731909,30300100,1,8001029,2023-06-01 05:19:23,8002629,2023-06-01 05:45:44,POINT (36.30447 127.35135),POINT (36.32818 127.424965)
1,769070523,30300100,1,8001029,2023-06-01 05:54:05,8002629,2023-06-01 06:13:06,POINT (36.30447 127.35135),POINT (36.32818 127.424965)
2,713242790,30300100,1,8001029,2023-06-01 06:18:08,8002629,2023-06-01 06:41:14,POINT (36.30447 127.35135),POINT (36.32818 127.424965)
3,750657469,30300100,1,8001029,2023-06-01 07:03:37,8002629,2023-06-01 07:28:07,POINT (36.30447 127.35135),POINT (36.32818 127.424965)
4,932770609,30300100,1,8001029,2023-06-01 07:15:20,8002629,2023-06-01 07:38:37,POINT (36.30447 127.35135),POINT (36.32818 127.424965)
...,...,...,...,...,...,...,...,...,...
373875,658208517,30300007,22,8001606,2023-06-01 08:09:21,8070037,2023-06-01 08:27:33,POINT (36.247124 127.34333),POINT (36.201332 127.34049)
373876,541132516,30300007,22,8005534,2023-06-01 19:34:28,8070037,2023-06-01 19:51:00,POINT (36.25011 127.34206),POINT (36.201332 127.34049)
373877,785011605,30300099,73,8001210,2023-06-01 12:06:13,8001333,2023-06-01 12:34:28,POINT (36.458904 127.38522),POINT (36.47894 127.39551)
373878,658208517,30300007,22,8070038,2023-06-01 17:46:05,8007258,2023-06-01 17:57:27,POINT (36.201443 127.340546),POINT (36.24634 127.34675)


In [225]:
BUS_102 = COORDINATE_MERGE[COORDINATE_MERGE['BUS_ROUTE_ID']==30300037]
BUS_102['DIFF_TIME'] = BUS_102['GETOFF_DATETIME'] - BUS_102['GETON_DATETIME']
BUS_102['DIFF_HOURS'] = BUS_102['DIFF_TIME'].dt.total_seconds() / 3600 # DIFF_TIME을 시간 단위로 변환
BUS_102.head(3)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_DATETIME,GETON_GEOMETRY,GETOFF_GEOMETRY,DIFF_TIME,DIFF_HOURS
10617,727348346,30300037,102,8002984,2023-06-01 20:14:44,8001348,2023-06-01 20:54:50,POINT (36.359398 127.3545),POINT (36.32906 127.441734),0 days 00:40:06,0.668333
10646,737178176,30300037,102,8001533,2023-06-01 12:46:22,8001348,2023-06-01 12:56:07,POINT (36.350216 127.44048),POINT (36.32906 127.441734),0 days 00:09:45,0.1625
10647,545786906,30300037,102,8001533,2023-06-01 17:10:44,8001348,2023-06-01 17:21:43,POINT (36.350216 127.44048),POINT (36.32906 127.441734),0 days 00:10:59,0.183056


In [226]:
# 거리 계산을 위한 함수 정의
def calculate_distance(row):
    return row['GETOFF_GEOMETRY'].distance(row['GETON_GEOMETRY']) * 111.32  # Approximate factor for km

# 거리 계산 후 속도 계산
BUS_102['DISTANCE_KM'] = BUS_102.apply(calculate_distance, axis=1)
BUS_102['SPEED_KMH'] = BUS_102['DISTANCE_KM'] / BUS_102['DIFF_HOURS']

BUS_102 = BUS_102[['CARD_NO','BUS_ROUTE_ID','ROUTE_NO','GETON_BUS_STTN_ID', 'GETON_GEOMETRY','GETON_DATETIME','GETOFF_BUS_STTN_ID', 'GETOFF_GEOMETRY','GETOFF_DATETIME','SPEED_KMH']]
BUS_102.head(3)

Unnamed: 0,CARD_NO,BUS_ROUTE_ID,ROUTE_NO,GETON_BUS_STTN_ID,GETON_GEOMETRY,GETON_DATETIME,GETOFF_BUS_STTN_ID,GETOFF_GEOMETRY,GETOFF_DATETIME,SPEED_KMH
10617,727348346,30300037,102,8002984,POINT (36.359398 127.3545),2023-06-01 20:14:44,8001348,POINT (36.32906 127.441734),2023-06-01 20:54:50,15.383629
10646,737178176,30300037,102,8001533,POINT (36.350216 127.44048),2023-06-01 12:46:22,8001348,POINT (36.32906 127.441734),2023-06-01 12:56:07,14.518274
10647,545786906,30300037,102,8001533,POINT (36.350216 127.44048),2023-06-01 17:10:44,8001348,POINT (36.32906 127.441734),2023-06-01 17:21:43,12.887997
