### load python library

In [24]:
import pandas as pd
import pandas_profiling
pd.options.mode.chained_assignment = None  # default='warn'
from pyproj import Proj, transform
import numpy as np
import matplotlib.pyplot as  plt
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, GridspecLayout
from collections import OrderedDict
import geopandas as gpd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import warnings
warnings.filterwarnings(action='ignore')

### load dataset

In [34]:
path = '/home/yubin90/pyWork/diamond/'
def stp_cols(df):
    cols = list(map(str.strip, df.columns.tolist()))
    df.columns = cols             
    return df

ride_acvm_df = pd.read_csv(path+'TB_COL_KR_TRVL_STNP_CLSF_RIDE_ACVM.csv', low_memory=False)
ride_acvm_df = stp_cols(ride_acvm_df)
ride_acvm_df = ride_acvm_df.drop(labels='AGGR_YMDHMS', axis=1)

trvl_stn_cd_df = pd.read_csv(path+'TB_COL_KR_TRVL_STN_CD.csv', low_memory=False)
trvl_stn_cd_df = stp_cols(trvl_stn_cd_df)
trvl_stn_cd_df = trvl_stn_cd_df.drop(labels='AGGR_YMDHMS', axis=1)
trvl_stn_cd_df = trvl_stn_cd_df.drop_duplicates(subset='STN_CD')
trvl_stlb_clsf_cd_df = pd.read_csv(path+'TB_COL_KR_TRVL_STLB_TRN_CLSF_CD.csv')
trvl_stlb_clsf_cd_df = stp_cols(trvl_stlb_clsf_cd_df)
trvl_stlb_clsf_cd_df = trvl_stlb_clsf_cd_df.drop(labels='AGGR_YMDHMS', axis=1)
stn_loc_df = pd.read_csv(path+'stn_loc_no_dup.csv', encoding='cp949')
#trvl_stlb_clsf_cd_df[trvl_stlb_clsf_cd_df['STLB_TRN_CLSF_CD_NM'] == 'KTX']

# frg_bs_df = pd.read_csv(path+"TB_COL_KR_CAR_FRG_TRN_BS_INFO.csv")
# frg_bs_df = stp_cols(frg_bs_df)
# trvl_bs_df = pd.read_csv(path+"TB_COL_KR_CAR_TRVL_TRN_BS_INFO.csv", low_memory=False)
# trvl_bs_df = stp_cols(trvl_bs_df)
# ttime_df = pd.read_csv(path+'train_time_reshape_20200109.csv', encoding='cp949')

In [35]:
#### create table profile
# pr = trvl_stlb_clsf_cd_df.profile_report()
# pr.to_file('./TB_COL_KR_TRVL_STLB_TRN_CLSF_CD.html')

### code mapping

In [44]:
#년월 생성
ride_acvm_df['YYMM'] = ride_acvm_df['RUN_DT'].astype(str).str[:-2]
# 역명코드 dict 생성
trvl_stn_cd_dict= dict(zip(trvl_stn_cd_df.STN_CD, trvl_stn_cd_df.KOR_STN_NM))
# dict mapping(역명 한글이름 매칭)
ride_acvm_df['KOR_STOP_STN'] = ride_acvm_df['STOP_STN'].map(trvl_stn_cd_dict)

In [45]:
ride_acvm_df

Unnamed: 0,RUN_DT,STOP_STN,STLB_TRN_CLSF_CD,UP_DN_DV_CD,ABRD_PRNB,GOFF_PRNB,NSTP_PRNB,YYMM,KOR_STOP_STN
0,20191001,3900280,0,D,1148,3021,15849,201910,오송
1,20191001,3900280,7,D,630,1422,6902,201910,오송
2,20191001,3900280,10,D,49,127,615,201910,오송
3,20191001,3900023,7,U,19,4749,378,201910,서울
4,20191001,3900096,7,U,1332,710,4545,201910,동대구
...,...,...,...,...,...,...,...,...,...
29277,20191031,3900292,1,U,10,6,60,201910,충주
29278,20191031,3900556,1,U,0,0,41,201910,춘양
29279,20191031,3900611,1,U,5,0,36,201910,민둥산
29280,20191031,3900703,1,D,0,1,61,201910,북천


### 역 위치 (위,경도) 매핑

In [46]:
ride_acvm_with_loc = pd.merge(ride_acvm_df,stn_loc_df, how='left', left_on='KOR_STOP_STN', right_on='STN_NM')
ride_acvm_with_loc

Unnamed: 0,RUN_DT,STOP_STN,STLB_TRN_CLSF_CD,UP_DN_DV_CD,ABRD_PRNB,GOFF_PRNB,NSTP_PRNB,YYMM,KOR_STOP_STN,STN_NM,LAT,LNG
0,20191001,3900280,0,D,1148,3021,15849,201910,오송,오송,36.620098,127.327582
1,20191001,3900280,7,D,630,1422,6902,201910,오송,오송,36.620098,127.327582
2,20191001,3900280,10,D,49,127,615,201910,오송,오송,36.620098,127.327582
3,20191001,3900023,7,U,19,4749,378,201910,서울,서울,37.554073,126.970702
4,20191001,3900096,7,U,1332,710,4545,201910,동대구,동대구,35.879437,128.628784
...,...,...,...,...,...,...,...,...,...,...,...,...
29277,20191031,3900292,1,U,10,6,60,201910,충주,충주,36.975892,127.909136
29278,20191031,3900556,1,U,0,0,41,201910,춘양,춘양,36.937810,128.919938
29279,20191031,3900611,1,U,5,0,36,201910,민둥산,민둥산,37.243701,128.773657
29280,20191031,3900703,1,D,0,1,61,201910,북천,북천,35.111383,127.883501


###  공공데이터
- 한국감정원_월별 KTX역 역세권 지가지수_2014년 11월 ~ 2019년 7월
- 각 역 단위 실 주소(경부선)

In [86]:
land_value = pd.read_csv(path+'land_value_2017_2019.csv', encoding='cp949')
land_value[['YYMM', '오송']].head()

Unnamed: 0,YYMM,오송
0,201701,100.226
1,201702,99.968
2,201703,100.988
3,201704,101.482
4,201705,102.83


In [51]:
stn_addr = pd.read_csv(path+'stn_addr_2016.csv', encoding='cp949')
stn_addr.head()

Unnamed: 0,역명,주소
0,가수원,대전 서구 가수원동 547-1
1,가야,부산 부산진구 백양대로 91
2,각계,충북 영동군 심천면 각계리
3,감곡,전북 정읍시 감곡면 유정리 196-1
4,강경,충남 논산시 강경읍 대흥리 32


In [52]:
#merge
ride_acvm_with_loc_addr = pd.merge(ride_acvm_with_loc, stn_addr, how='left', left_on='KOR_STOP_STN', right_on='역명')
ride_acvm_with_loc_addr

Unnamed: 0,RUN_DT,STOP_STN,STLB_TRN_CLSF_CD,UP_DN_DV_CD,ABRD_PRNB,GOFF_PRNB,NSTP_PRNB,YYMM,KOR_STOP_STN,STN_NM,LAT,LNG,역명,주소
0,20191001,3900280,0,D,1148,3021,15849,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1
1,20191001,3900280,7,D,630,1422,6902,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1
2,20191001,3900280,10,D,49,127,615,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1
3,20191001,3900023,7,U,19,4749,378,201910,서울,서울,37.554073,126.970702,서울,서울 용산구 동자동 43-205
4,20191001,3900096,7,U,1332,710,4545,201910,동대구,동대구,35.879437,128.628784,동대구,대구 동구 동대구로 550(신암동)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29277,20191031,3900292,1,U,10,6,60,201910,충주,충주,36.975892,127.909136,충주,충북 충주시 봉방동 409
29278,20191031,3900556,1,U,0,0,41,201910,춘양,춘양,36.937810,128.919938,춘양,경북 봉화군 춘양면 운곡길 22-2
29279,20191031,3900611,1,U,5,0,36,201910,민둥산,민둥산,37.243701,128.773657,민둥산,강원도 정선군 남면 무릉4리 617-2
29280,20191031,3900703,1,D,0,1,61,201910,북천,북천,35.111383,127.883501,북천,경남 하동군 북천면 경서대로 2418-6


### 지도 설정
- 좌표계 포맷 변경
- 역 별 위경도(WG84 -> UTM-K)

In [59]:
proj_UTMK = Proj(init='epsg:5178') # UTM-K(Bassel)
proj_WGS84 = Proj(init='epsg:4326') # Wgs84 경도/위도, GPS

def transform_w84_to_utmk(df):
    return pd.Series(transform(proj_WGS84, proj_UTMK, df['LNG'], df['LAT']), index=['LNG', 'LAT'])

# ride_acvm_with_loc_addr[['LNG_utmk', 'LAT_utmk']] = ride_acvm_with_loc_addr.apply(transform_w84_to_utmk, axis=1)

In [60]:
# ride_acvm_with_loc_addr.to_csv('./ride_acvm_with_loc_addr.csv')

In [61]:
ride_acvm_with_loc_addr.head()

Unnamed: 0,RUN_DT,STOP_STN,STLB_TRN_CLSF_CD,UP_DN_DV_CD,ABRD_PRNB,GOFF_PRNB,NSTP_PRNB,YYMM,KOR_STOP_STN,STN_NM,LAT,LNG,역명,주소,LNG_utmk,LAT_utmk
0,20191001,3900280,0,D,1148,3021,15849,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1,984774.0,1846622.0
1,20191001,3900280,7,D,630,1422,6902,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1,984774.0,1846622.0
2,20191001,3900280,10,D,49,127,615,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1,984774.0,1846622.0
3,20191001,3900023,7,U,19,4749,378,201910,서울,서울,37.554073,126.970702,서울,서울 용산구 동자동 43-205,953438.2,1950351.0
4,20191001,3900096,7,U,1332,710,4545,201910,동대구,동대구,35.879437,128.628784,동대구,대구 동구 동대구로 550(신암동),1102084.0,1765044.0


### 고속열차(KTX) 대상으로 함
- 승차,하차,통과인원수 0이거나 음수인 경우 제외

In [62]:
ride_acvm_with_loc_addr = ride_acvm_with_loc_addr[ride_acvm_with_loc_addr['STLB_TRN_CLSF_CD'] == 0] #2533

In [69]:
ride_fin_df = ride_acvm_with_loc_addr[(ride_acvm_with_loc_addr[['ABRD_PRNB','GOFF_PRNB', 'NSTP_PRNB']] != 0).all(axis=1)] #1985

### 지도 및 차트 생성

In [71]:
widget_time = sorted(list(ride_fin_df['YYMM'].unique()))
widget_stn = list(ride_fin_df['KOR_STOP_STN'].unique())
widget_updown = ['D', 'U']
widget_prnb_Value = ['ABRD_PRNB', 'GOFF_PRNB', 'NSTP_PRNB']

In [73]:
ride_acvm_gpd = gpd.GeoDataFrame(
    ride_fin_df, geometry=gpd.points_from_xy(ride_fin_df.LNG_utmk, ride_fin_df.LAT_utmk)
)
ride_acvm_gpd.head()

Unnamed: 0,RUN_DT,STOP_STN,STLB_TRN_CLSF_CD,UP_DN_DV_CD,ABRD_PRNB,GOFF_PRNB,NSTP_PRNB,YYMM,KOR_STOP_STN,STN_NM,LAT,LNG,역명,주소,LNG_utmk,LAT_utmk,geometry
0,20191001,3900280,0,D,1148,3021,15849,201910,오송,오송,36.620098,127.327582,오송,충청북도 청원군 강외면 봉산리 370-1,984774.0,1846622.0,POINT (984774.029 1846622.323)
6,20191001,3900047,0,D,968,124,1819,201910,수원,수원,37.266045,126.999796,수원,경기도 수원시 팔달구 덕영대로 924,955839.0,1918382.0,POINT (955838.951 1918382.084)
12,20191001,3900685,0,D,1,208,148,201910,창원,창원,35.257365,128.606778,창원,경남 창원시 의창구 의창대로 67,1100874.0,1696025.0,POINT (1100874.105 1696025.006)
13,20191001,3900902,0,U,980,17,1666,201910,창원중앙,창원중앙,35.242323,128.701341,창원중앙,경남 창원시 의창구 상남로 381,1109497.0,1694457.0,POINT (1109496.880 1694456.908)
16,20191001,3900229,0,U,2269,72,3222,201910,광주송정,광주송정,35.137757,126.79095,광주송정,광주 광산구 송정2동 1003-1,935599.3,1682427.0,POINT (935599.278 1682426.726)


In [74]:
# base map loading
whole_map = pd.read_pickle(path+'whole_map.pkl')
whole_map.columns = ['geometry']

#b_time
w1 = widgets.Dropdown(
    options=widget_time,
    value=widget_time[0],
    description='기준년월 :',
    disabled=False,
)

#b_stn
w2 = widgets.Dropdown(
    options=widget_stn,
    value=widget_stn[0],
    description='역명 :',
    disabled=False,
)

#b_dir
w3 = widgets.Dropdown(
    options=widget_updown,
    value=widget_updown[0],
    description='상하행 :',
    disabled=False,
)

#want_view
w4 = widgets.Dropdown(
    options=widget_prnb_Value,
    value=widget_prnb_Value[0],
    description='기준인원 :',
    disabled=False,
)


In [92]:
def view(b_time = '', b_stn = '', b_dir = '', want_view = ''):
    if b_time=='All': 
        return ride_acvm_gpd
    temp_df = ride_acvm_gpd
    temp_df = temp_df[temp_df['YYMM']==b_time]
    temp_df = temp_df[temp_df['KOR_STOP_STN']==b_stn]
    if temp_df.shape[0] == 0:
        return "데이터가 없습니다"

    font = {'family' : 'nanumgothic', 'size':10}
    plt.rc('font', **font) #font option
    fig = plt.figure(figsize=(28,8), constrained_layout=False)    

    gs = fig.add_gridspec(nrows=5, ncols=8, left=0.05, right=0.48, wspace=0.5)
    ax1 = fig.add_subplot(gs[:5, :4]) #row, col
    ax2 = fig.add_subplot(gs[:2, 4:8])  #row, col
    ax3 = fig.add_subplot(gs[3:5, 4:8])

#     plt.subplots(figsize=(15,10), subplot_kw={'aspect':'equal'})
    whole_map.plot(color='white', edgecolor='black', legend=True, ax=ax1, linewidth=0.2)
    gpd.GeoDataFrame(temp_df).plot(column='KOR_STOP_STN', marker='*', color="r", markersize=40,legend=True, ax=ax1)

    ax1.axes.get_xaxis().set_visible(False)
    ax1.axes.get_yaxis().set_visible(False)
    ax1.set_title('역 위치')

    #1. 역명,기준연월, 상하행
    df = temp_df[(temp_df['KOR_STOP_STN'] == b_stn) & (temp_df['UP_DN_DV_CD'] == b_dir)]
    bar_1 = df[['RUN_DT',want_view]]
    bar_1.sort_index().plot.bar(x='RUN_DT',y=want_view, ax=ax2, fontsize=10, color="orange", rot=90)
    ax2.set_title('2019년 한달 간 기준인원 변화')
    #ax1.set_ylabel('volts')
    #2. 해당 역 지가지수
    bar_2 = pd.DataFrame(land_value[['YYMM', b_stn]])
    bar_2.sort_index().plot.bar(x='YYMM',y=b_stn, ax=ax3, fontsize=10, color="purple", rot=90)
    ax3.set_ylim([80, 140])
    ax3.set_title('17~19년 해당 역 지가지수 변화')

    #bar_df_1 = pd.DataFrame(temp_df['피해자연령대'].value_counts())
    #bar_df_1 = bar_df_1[(bar_df_1.index != '') & (bar_df_1.index != '미상')] 
#    bar_df_1.sort_index().plot(kind='bar', ax=ax2, fontsize=10, color="orange", rot=55)
#    bar_df_2 = pd.DataFrame(temp_df['피해자성별'].value_counts())
#    bar_df_2.plot(kind='bar', ax=ax3, fontsize=10, color="teal", rot=0)

In [93]:
interact_manual(view, b_time=w1, b_stn=w2, b_dir=w3, want_view = w4)

interactive(children=(Dropdown(description='기준년월 :', options=('201910',), value='201910'), Dropdown(descriptio…

<function __main__.view(b_time='', b_stn='', b_dir='', want_view='')>