# 버거지수

In [1]:
import sys
import numpy as np
import pandas as pd
import time
import json
import re

# A Fast, Extensible Progress Bar for Python and CLI
from tqdm import tqdm

# Statistical data visualization using matplotlib.
import seaborn as sns

# Python Data. Leaflet.js Maps.
import folium

# matplotlib: plotting with Python.
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager

# Set the matplotlib color cycle using a seaborn palette.
sns.set_palette('pastel')

# plot outputs appear and be stored within the notebook.
%matplotlib inline

# A module for finding, managing, and using fonts across platforms.
mpl.font_manager._rebuild()
sorted([f.name for f in mpl.font_manager.fontManager.ttflist if f.name.startswith('Malgun')])

# 폰트 설정
mpl.rc('font', family='Malgun Gothic')

# 유니코드에서  음수 부호설정
mpl.rc('axes', unicode_minus=False)

# An importlib package installable from any python version
import importlib
import modules.fs as fs

## 공공데이터포털: 상가(상권)정보

In [2]:
df1 = fs.read_csv('./data/semas.or.kr/201912/상가업소정보_201912_02.csv', sep='|')
df2 = fs.read_csv('./data/semas.or.kr/201912/상가업소정보_201912_02.csv', sep='|')
df3 = fs.read_csv('./data/semas.or.kr/201912/상가업소정보_201912_02.csv', sep='|')
df4 = fs.read_csv('./data/semas.or.kr/201912/상가업소정보_201912_02.csv', sep='|')

In [3]:
# 맥도날드
mcdonald1 = df1.fillna('').query("상호명.str.contains('맥도날드')", engine='python')
mcdonald2 = df2.fillna('').query("상호명.str.contains('맥도날드')", engine='python')
mcdonald3 = df3.fillna('').query("상호명.str.contains('맥도날드')", engine='python')
mcdonald4 = df4.fillna('').query("상호명.str.contains('맥도날드')", engine='python')

mcdonald = pd.concat([mcdonald1, mcdonald2, mcdonald3, mcdonald4])
mcdonald.index = np.arange(0, len(mcdonald))

fs.to_csv(mcdonald, './data/macdonald_in_korea_2020.csv', sep='|')

mcdonald.head(1)

Unnamed: 0,상가업소번호,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,...,건물관리번호,건물명,도로명주소,구우편번호,신우편번호,동정보,층정보,호정보,경도,위도
0,20700496,맥도날드,대전한남대디티점,Q,음식,Q07,패스트푸드,Q07A04,패스트푸드,I56199,...,3023010900102300015019504,맥도날드,대전광역시 대덕구 한밭대로 1102,306826,34426,,2,1,127.421231,36.358409


In [4]:
# 버거킹
burgerking1 = df1.fillna('').query("상호명.str.contains('버거킹')", engine='python')
burgerking2 = df2.fillna('').query("상호명.str.contains('버거킹')", engine='python')
burgerking3 = df3.fillna('').query("상호명.str.contains('버거킹')", engine='python')
burgerking4 = df4.fillna('').query("상호명.str.contains('버거킹')", engine='python')

burgerking = pd.concat([burgerking1, burgerking2, burgerking3, burgerking4])
burgerking.index = np.arange(0, len(burgerking))

fs.to_csv(burgerking, './data/burgerking_in_korea_2020.csv', sep='|')

burgerking.head(1)

Unnamed: 0,상가업소번호,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,...,건물관리번호,건물명,도로명주소,구우편번호,신우편번호,동정보,층정보,호정보,경도,위도
0,22878596,에스알에스코리아버거킹,대구역롯데점,Q,음식,Q07,패스트푸드,Q07A04,패스트푸드,I56199,...,2723010200103020155000001,롯데백화점대구역,대구광역시 북구 태평로 161,702746,41581,,2,,128.596131,35.875884


In [5]:
# KFC
kfc1 = df1.fillna('').query("상호명.str.contains('KFC', case=False)", engine='python')
kfc2 = df2.fillna('').query("상호명.str.contains('KFC', case=False)", engine='python')
kfc3 = df3.fillna('').query("상호명.str.contains('KFC', case=False)", engine='python')
kfc4 = df4.fillna('').query("상호명.str.contains('KFC', case=False)", engine='python')

kfc = pd.concat([kfc1, kfc2, kfc3, kfc4])
kfc.index = np.arange(0, len(kfc))

fs.to_csv(kfc, './data/kfc_in_korea_2020.csv', sep='|')

kfc.head(1)

Unnamed: 0,상가업소번호,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,...,건물관리번호,건물명,도로명주소,구우편번호,신우편번호,동정보,층정보,호정보,경도,위도
0,23269560,KFC인천공항스카이,,Q,음식,Q07,패스트푸드,Q07A04,패스트푸드,I56199,...,2811014700128510000223132,인천국제공항역,인천광역시 중구 공항로 271,400715,22382,FB,4,,126.452336,37.447592


In [6]:
# 롯데리아
lotteria1 = df1.fillna('').query("상호명.str.contains('롯데리아')", engine='python')
lotteria2 = df2.fillna('').query("상호명.str.contains('롯데리아')", engine='python')
lotteria3 = df3.fillna('').query("상호명.str.contains('롯데리아')", engine='python')
lotteria4 = df4.fillna('').query("상호명.str.contains('롯데리아')", engine='python')

lotteria = pd.concat([lotteria1, lotteria2, lotteria3, lotteria4])
lotteria.index = np.arange(0, len(lotteria))

fs.to_csv(lotteria, './data/lotteria_in_korea_2020.csv', sep='|')

lotteria.head(1)

Unnamed: 0,상가업소번호,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,...,건물관리번호,건물명,도로명주소,구우편번호,신우편번호,동정보,층정보,호정보,경도,위도
0,19922527,롯데리아사월점,사월점,Q,음식,Q07,패스트푸드,Q07A04,패스트푸드,I56199,...,2726011800100090001000001,시지효성백년가약1단지아파트,대구광역시 수성구 달구벌대로 3280,706959,42278,,1,,128.714818,35.836723


### 패스트푸드 (맥도날드 & KFC & 버거킹 & 롯데리아)

In [7]:
fastfood = pd.concat([mcdonald, burgerking, kfc, lotteria])

fastfood = fastfood[['상호명', '시도명', '시도코드', '시군구명', '시군구코드', '행정동명', '행정동코드', '지점명', '도로명주소', '경도', '위도']]
fastfood.index = np.arange(0, len(fastfood))

fs.to_csv(fastfood, './data/fastfood_in_korea_2020.csv', sep='|')

fastfood.head(1)

Unnamed: 0,상호명,시도명,시도코드,시군구명,시군구코드,행정동명,행정동코드,지점명,도로명주소,경도,위도
0,맥도날드,대전광역시,30,대덕구,30230,중리동,3023054600,대전한남대디티점,대전광역시 대덕구 한밭대로 1102,127.421231,36.358409


## 통계청: 행정구역(시군구)별, 성별 인구수

In [8]:
population = fs.read_csv('./data/kosis.kr/population-202010.csv')
navigation = fs.read_csv('./data/juso.go.kr/navi-202010.csv')

df = pd.merge(population, navigation, on='시군구코드')
df.to_csv('./data/population_and_navi_202010.csv',
              sep=',', encoding='utf-8', index=False)

df.head()

Unnamed: 0,시도명,시도코드,시군구명,시군구코드,인구수,남자수,여자수,시군구_x,시군구_y
0,서울특별시,11,종로구,11110,151290,73746,77544,954859.306142,1953755.0
1,서울특별시,11,중구,11140,126171,61910,64261,955931.02785,1951526.0
2,서울특별시,11,용산구,11170,228670,110356,118314,954122.014984,1948855.0
3,서울특별시,11,성동구,11200,300889,147273,153616,959571.250766,1950601.0
4,서울특별시,11,광진구,11215,351350,170262,181088,963032.466631,1949996.0
