In [1]:
# Miscellaneous operating system interfaces
import os

# JSON encoder and decoder
import json

# Basic date and time types
from datetime import datetime, date, timedelta

# The fundamental package for scientific computing with Python.
import numpy as np

# Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
import pandas as pd

# Python tools for geographic data
import geopandas as gpd

# Set the absolute directory path.
BASE_PATH = os.path.abspath('')
dirs = os.path.dirname(BASE_PATH)
DATA_PATH = os.path.join(dirs, 'datasets')

## Export to csv

In [2]:
# 서울시_공원_통계
df1 = pd.read_csv(os.path.join(DATA_PATH, 'data.seoul.go.kr', '서울시_공원_통계_2017_2019.txt'),
    sep='\t',
    thousands=',', 
    encoding='utf-8',
    usecols=['기간', '자치구', '계'],
    skiprows=[1,2]
)

df1.rename(columns={
    '계': '공원수'
}, inplace=True)

df1 = df1[(df1['자치구'] != '합계') & (df1['자치구'] != '서울대공원')]
df1.index = np.arange(0, len(df1))

df1.head()

Unnamed: 0,기간,자치구,공원수
0,2017,종로구,110
1,2017,중구,70
2,2017,용산구,102
3,2017,성동구,84
4,2017,광진구,55


In [3]:
# 서울시_공원_1인당_공원면적_통계
df2 = pd.read_csv(os.path.join(DATA_PATH, 'data.seoul.go.kr', '서울시_공원_1인당_공원면적_통계_2017_2019.txt'),
    sep='\t',
    thousands=',', 
    encoding='utf-8',
    usecols=['기간', '자치구', '공원(1인당공원면적)', '공원(1인당공원면적).1'],
    skiprows=[1,2]
)

df2.rename(columns={
    '공원(1인당공원면적)': '공원면적',
    '공원(1인당공원면적).1': '1인당공원면적',
}, inplace=True)

df2 = df2[(df2['자치구'] != '서울시') & (df2['자치구'] != '서울대공원')]
df2.index = np.arange(0, len(df2))

for column in ['공원면적', '1인당공원면적']:
    df2[column] = np.where(df2[column] == '-', np.nan, df2[column])
    df2[column] = pd.to_numeric(df2[column])

df2.head()

Unnamed: 0,기간,자치구,공원면적,1인당공원면적
0,2017,종로구,11569.7,70.4
1,2017,중구,3132.0,23.3
2,2017,용산구,1794.1,7.3
3,2017,성동구,3101.4,9.9
4,2017,광진구,3359.2,9.0


In [4]:
df3 = pd.merge(df1, df2, on=['기간','자치구'])
df3.head()

Unnamed: 0,기간,자치구,공원수,공원면적,1인당공원면적
0,2017,종로구,110,11569.7,70.4
1,2017,중구,70,3132.0,23.3
2,2017,용산구,102,1794.1,7.3
3,2017,성동구,84,3101.4,9.9
4,2017,광진구,55,3359.2,9.0


In [5]:
# 서울시_공원_1인당_공원면적_통계
df4 = pd.read_csv(os.path.join(DATA_PATH, '행정구역_시군구_별__좌표_2020.csv'))
df4.head()

Unnamed: 0,시도명,시도코드,시군구명,시군구코드,시군구_x,시군구_y
0,서울특별시,11,종로구,11110,954859.306142,1953755.0
1,서울특별시,11,중구,11140,955931.02785,1951526.0
2,서울특별시,11,용산구,11170,954122.014984,1948855.0
3,서울특별시,11,성동구,11200,959571.250766,1950601.0
4,서울특별시,11,광진구,11215,963032.466631,1949996.0


In [6]:
df = pd.merge(df3, df4, left_on='자치구', right_on='시군구명', how='left')

df['시도코드'] = df['시도코드'].astype(str)
df['시군구코드'] = df['시군구코드'].astype(str)

df['SIG_CD'] = df['시군구코드']
df['SIG_KOR_NM'] = df['시군구명']

# for column in ['시도코드', '시군구코드']:
#     df[column] = np.where(df[column] == '-', np.nan, df[column])
#     df[column] = df[column].fillna(0).astype(int)

df = df[['기간', '시도명', '시도코드', '시군구명', '시군구코드', '자치구', '공원수', '공원면적', '1인당공원면적', '시군구_x', '시군구_y', 'SIG_KOR_NM', 'SIG_CD']]

# df.to_csv(os.path.join(DATA_PATH, '서울시_공원_통계_2017_2019.csv'), index=False)
df.head()

Unnamed: 0,기간,시도명,시도코드,시군구명,SIG_KOR_NM,시군구코드,SIG_CD,자치구,공원수,공원면적,1인당공원면적,시군구_x,시군구_y
0,2017,서울특별시,11,종로구,종로구,11110,11110,종로구,110,11569.7,70.4,954859.3,1953755.0
1,2017,서울특별시,11,중구,중구,11140,11140,중구,70,3132.0,23.3,955931.0,1951526.0
2,2017,부산광역시,26,중구,중구,26110,26110,중구,70,3132.0,23.3,1139440.0,1679949.0
3,2017,대구광역시,27,중구,중구,27110,27110,중구,70,3132.0,23.3,1098782.0,1763839.0
4,2017,인천광역시,28,중구,중구,28110,28110,중구,70,3132.0,23.3,916860.3,1941823.0


## Export to geojson

In [7]:
f = os.path.join(DATA_PATH, 'gisdeveloper.co.kr', 'SIG_202005', 'SIG.shp')
base = gpd.GeoDataFrame.from_file(f, encoding='euc-kr')

# Only Seoul city data is extracted.
base = base[base['SIG_CD'].str.startswith('11')]

gdf = base.merge(df, on='SIG_CD')

gdf.rename(columns={
    'SIG_KOR_NM_x': 'SIG_KOR_NM'
}, inplace=True)

gdf = gdf[['기간', '시도명', '시도코드',
       '시군구명', '시군구코드', '공원수', '공원면적', '1인당공원면적', '시군구_x', '시군구_y',
       'SIG_CD', 'SIG_ENG_NM', 'SIG_KOR_NM', 'geometry']]

# EPSG라는 산업표준 좌표계 코드는 4326이고, GPS 기본 좌표계이기도 함.
gdf = gdf.to_crs(epsg=4326)

gdf.to_file(os.path.join(DATA_PATH, '서울시_공원_통계_2017_2019.json'), driver='GeoJSON')
gdf.head(1)

Unnamed: 0,기간,시도명,시도코드,시군구명,시군구코드,공원수,공원면적,1인당공원면적,시군구_x,시군구_y,SIG_CD,SIG_ENG_NM,SIG_KOR_NM,geometry
0,2017,서울특별시,11,종로구,11110,110,11569.7,70.4,954859.306142,1953755.0,11110,Jongno-gu,종로구,"POLYGON ((127.00864 37.58047, 127.00871 37.580..."
