### 사용할 모듈 import하기

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

### 차트 내 한글 깨짐 방지

In [2]:
# 사용자 운영체제 확인
import platform
platform.system()
# 운영체제별 한글 폰트 설정
if platform.system() == 'Darwin': # Mac 환경 폰트 설정
    plt.rc('font', family='AppleGothic')
elif platform.system() == 'Windows': # Windows 환경 폰트 설정
    plt.rc('font', family='Malgun Gothic')

plt.rc('axes', unicode_minus=False) # 마이너스 폰트 설정

# 글씨 선명하게 출력하는 설정
%config InlineBackend.figure_format = 'retina'

### 경고창 무시하기

In [3]:
import warnings
warnings.filterwarnings(action='ignore')

### csv파일 불러오기

In [4]:
df = pd.read_csv('./csvdatas/mydata.csv', encoding = 'cp949')

### 데이터프레임의 기초 정보 확인하기

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 468922 entries, 0 to 468921
Data columns (total 44 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   개방자치단체코드   468922 non-null  int64  
 1   관리번호       468922 non-null  object 
 2   인허가일자      468922 non-null  int64  
 3   인허가취소일자    0 non-null       float64
 4   영업상태코드     468922 non-null  int64  
 5   영업상태명      468922 non-null  object 
 6   상세영업상태코드   468922 non-null  int64  
 7   상세영업상태명    468922 non-null  object 
 8   폐업일자       344722 non-null  float64
 9   휴업시작일자     0 non-null       float64
 10  휴업종료일자     0 non-null       float64
 11  재개업일자      0 non-null       float64
 12  전화번호       327769 non-null  object 
 13  소재지면적      434185 non-null  float64
 14  소재지우편번호    468677 non-null  float64
 15  지번주소       468685 non-null  object 
 16  도로명주소      217979 non-null  object 
 17  도로명우편번호    214449 non-null  float64
 18  사업장명       468921 non-null  object 
 19  최종수정일자     468922 non-n

### 데이터 전처리 작업하기
#### 1. 결측치 확인

In [6]:
df.isnull().sum()

개방자치단체코드          0
관리번호              0
인허가일자             0
인허가취소일자      468922
영업상태코드            0
영업상태명             0
상세영업상태코드          0
상세영업상태명           0
폐업일자         124200
휴업시작일자       468922
휴업종료일자       468922
재개업일자        468922
전화번호         141153
소재지면적         34737
소재지우편번호         245
지번주소            237
도로명주소        250943
도로명우편번호      254473
사업장명              1
최종수정일자            0
데이터갱신구분           0
데이터갱신일자           0
업태구분명            22
좌표정보(X)       24831
좌표정보(Y)       24831
위생업태명         60222
남성종사자수       205569
여성종사자수       203355
영업장주변구분명     226735
등급구분명        235572
급수시설구분명      180141
총인원          468922
본사종업원수       468824
공장사무직종업원수    468825
공장판매직종업원수    468825
공장생산직종업원수    468825
건물소유구분명      468921
보증액          468824
월세액          468824
다중이용업소여부      60206
시설총규모         60206
전통업소지정번호     467209
전통업소주된음식     468026
홈페이지         468922
dtype: int64

#### 2. 필요한 부분만 가져오기

#### 1) 폐업한 상점 데이터 가지고 오기, 필요없는 컬럼들 지우기

In [7]:
closed = df[df['상세영업상태명'] == '폐업']
closed = closed.drop(closed.columns[0:7], axis = 1)
closed = closed.drop(closed.columns[25:43], axis = 1)
closed.info()
# 지번주소 non-null한 데이터 수: 344524개, 도로명주소 non-null한 데이터 수: 95541개
## 지번주소에서 지역구를 추출하기 (데이터 수가 많으므로)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 344728 entries, 18 to 468920
Data columns (total 25 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   상세영업상태명   344728 non-null  object 
 1   폐업일자      344722 non-null  float64
 2   휴업시작일자    0 non-null       float64
 3   휴업종료일자    0 non-null       float64
 4   재개업일자     0 non-null       float64
 5   전화번호      268752 non-null  object 
 6   소재지면적     316100 non-null  float64
 7   소재지우편번호   344573 non-null  float64
 8   지번주소      344580 non-null  object 
 9   도로명주소     95597 non-null   object 
 10  도로명우편번호   92856 non-null   float64
 11  사업장명      344728 non-null  object 
 12  최종수정일자    344728 non-null  int64  
 13  데이터갱신구분   344728 non-null  object 
 14  데이터갱신일자   344728 non-null  object 
 15  업태구분명     344718 non-null  object 
 16  좌표정보(X)   321499 non-null  float64
 17  좌표정보(Y)   321499 non-null  float64
 18  위생업태명     327913 non-null  object 
 19  남성종사자수    234639 non-null  float64
 20  여성종

In [8]:
closed.head()

Unnamed: 0,상세영업상태명,폐업일자,휴업시작일자,휴업종료일자,재개업일자,전화번호,소재지면적,소재지우편번호,지번주소,도로명주소,...,업태구분명,좌표정보(X),좌표정보(Y),위생업태명,남성종사자수,여성종사자수,영업장주변구분명,등급구분명,급수시설구분명,총인원
18,폐업,20210430.0,,,,,14.7,137891.0,서울특별시 서초구 양재동 120-10 1층,서울특별시 서초구 양재천로11길 12 1층 (양재동),...,기타,203159.96,441451.87,,,,,,,
50,폐업,20200602.0,,,,,12.0,135829.0,서울특별시 강남구 논현동 226번지,서울특별시 강남구 봉은사로33길 34 지하1층 5-1호 (논현동),...,한식,203028.770828,445334.720543,,,,,,,
52,폐업,20200602.0,,,,,110.4,137875.0,서울특별시 서초구 서초동 1576-4번지 1층 101호,서울특별시 서초구 서초중앙로 103 1층 101호 (서초동),...,기타,201159.654678,443187.071173,,,,,,,
58,폐업,20210806.0,,,,,50.0,152854.0,서울특별시 구로구 구로동 429-43 1층,서울특별시 구로구 구로동로28길 51 1층 (구로동),...,중국식,189942.919736,443201.373933,,,,,,,
60,폐업,20200602.0,,,,02 356 5380,36.98,122923.0,서울특별시 은평구 응암동 126-5번지 1층 102호,서울특별시 은평구 응암로21가길 9-11 1층 102호 (응암동),...,기타,192540.370555,454712.243026,,,,,,,


#### 2) 상호별 상세 주소 가져오기

In [9]:
# 지번주소 컬럼에서 등록된 상호의 지역구 추출하기

closed = closed.dropna(subset = ['지번주소'])
# 지번주소 컬럼 내 결측치를 제거하기

def goo(x):
    if x[0] == "서":
        res = x.split(' ')[1]
        return res
    else:
        return None
# 지역구 추출하는 함수
## 데이터 내 '서울특별시 ~' 로 시작하는 주소가 없는 경우 Null처리하는 함수 정의

closed['지역(구)'] = closed['지번주소'].apply(goo)

closed['업태구분명'].isnull().sum()
# 데이터 내 업태구분명 컬럼에서도 결측치가 존재함

10

In [10]:
closed = closed.dropna(subset = ['업태구분명'])
closed['업태구분명'].isnull().sum()
# 업태구분명 컬럼의 결측치 모두 제거

0

In [11]:
closed = closed.dropna(subset = ['폐업일자'])
closed['폐업일자'].isnull().sum()
# 폐업일자 컬럼의 결측치가 제거됐는지 확인

0

In [12]:
closed
# 데이터프레임 작업 결과 확인

Unnamed: 0,상세영업상태명,폐업일자,휴업시작일자,휴업종료일자,재개업일자,전화번호,소재지면적,소재지우편번호,지번주소,도로명주소,...,좌표정보(X),좌표정보(Y),위생업태명,남성종사자수,여성종사자수,영업장주변구분명,등급구분명,급수시설구분명,총인원,지역(구)
18,폐업,20210430.0,,,,,14.70,137891.0,서울특별시 서초구 양재동 120-10 1층,서울특별시 서초구 양재천로11길 12 1층 (양재동),...,203159.960000,441451.870000,,,,,,,,서초구
50,폐업,20200602.0,,,,,12.00,135829.0,서울특별시 강남구 논현동 226번지,서울특별시 강남구 봉은사로33길 34 지하1층 5-1호 (논현동),...,203028.770828,445334.720543,,,,,,,,강남구
52,폐업,20200602.0,,,,,110.40,137875.0,서울특별시 서초구 서초동 1576-4번지 1층 101호,서울특별시 서초구 서초중앙로 103 1층 101호 (서초동),...,201159.654678,443187.071173,,,,,,,,서초구
58,폐업,20210806.0,,,,,50.00,152854.0,서울특별시 구로구 구로동 429-43 1층,서울특별시 구로구 구로동로28길 51 1층 (구로동),...,189942.919736,443201.373933,,,,,,,,구로구
60,폐업,20200602.0,,,,02 356 5380,36.98,122923.0,서울특별시 은평구 응암동 126-5번지 1층 102호,서울특별시 은평구 응암로21가길 9-11 1층 102호 (응암동),...,192540.370555,454712.243026,,,,,,,,은평구
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468906,폐업,20211005.0,,,,0234462696,149.48,137904.0,서울특별시 서초구 잠원동 28-8 1층,서울특별시 서초구 강남대로 571 1층 (잠원동),...,201713.624058,445611.458964,,,,,,,,서초구
468912,폐업,20211005.0,,,,,83.33,135928.0,서울특별시 강남구 역삼동 775-2 초원빌딩 1층 104호,서울특별시 강남구 논현로72길 16 (역삼동초원빌딩 1층 104호),...,203539.168707,443716.403617,,,,,,,,강남구
468916,폐업,20211005.0,,,,02 3540818,35.20,122881.0,서울특별시 은평구 신사동 19-161 1층,서울특별시 은평구 가좌로 328 1층 (신사동),...,191896.691065,454928.218389,,,,,,,,은평구
468918,폐업,20211005.0,,,,0209197488,66.44,136893.0,서울특별시 성북구 정릉동 227-65,서울특별시 성북구 서경로 96 (정릉동),...,201144.184887,456583.708970,,,,,,,,성북구


In [13]:
closed.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 344564 entries, 18 to 468920
Data columns (total 26 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   상세영업상태명   344564 non-null  object 
 1   폐업일자      344564 non-null  float64
 2   휴업시작일자    0 non-null       float64
 3   휴업종료일자    0 non-null       float64
 4   재개업일자     0 non-null       float64
 5   전화번호      268695 non-null  object 
 6   소재지면적     315942 non-null  float64
 7   소재지우편번호   344557 non-null  float64
 8   지번주소      344564 non-null  object 
 9   도로명주소     95440 non-null   object 
 10  도로명우편번호   92710 non-null   float64
 11  사업장명      344564 non-null  object 
 12  최종수정일자    344564 non-null  int64  
 13  데이터갱신구분   344564 non-null  object 
 14  데이터갱신일자   344564 non-null  object 
 15  업태구분명     344564 non-null  object 
 16  좌표정보(X)   321337 non-null  float64
 17  좌표정보(Y)   321337 non-null  float64
 18  위생업태명     327774 non-null  object 
 19  남성종사자수    234625 non-null  float64
 20  여성종

In [14]:
closed = closed.drop(closed.columns[18:25], axis = 1)
# 분석에 필요없는 컬럼들 다시 제거

In [15]:
closed.isnull().sum()
# 결측치가 없는지 재확인

상세영업상태명         0
폐업일자            0
휴업시작일자     344564
휴업종료일자     344564
재개업일자      344564
전화번호        75869
소재지면적       28622
소재지우편번호         7
지번주소            0
도로명주소      249124
도로명우편번호    251854
사업장명            0
최종수정일자          0
데이터갱신구분         0
데이터갱신일자         0
업태구분명           0
좌표정보(X)     23227
좌표정보(Y)     23227
지역(구)           0
dtype: int64

#### 3) 폐업일자가 2018.02.20 이후인 데이터만 추출하기

In [16]:
closed.head()

Unnamed: 0,상세영업상태명,폐업일자,휴업시작일자,휴업종료일자,재개업일자,전화번호,소재지면적,소재지우편번호,지번주소,도로명주소,도로명우편번호,사업장명,최종수정일자,데이터갱신구분,데이터갱신일자,업태구분명,좌표정보(X),좌표정보(Y),지역(구)
18,폐업,20210430.0,,,,,14.7,137891.0,서울특별시 서초구 양재동 120-10 1층,서울특별시 서초구 양재천로11길 12 1층 (양재동),6754.0,커피할루카,20210430172324,U,2020-12-05 00:02:00.0,기타,203159.96,441451.87,서초구
50,폐업,20200602.0,,,,,12.0,135829.0,서울특별시 강남구 논현동 226번지,서울특별시 강남구 봉은사로33길 34 지하1층 5-1호 (논현동),6107.0,김경자 3代 진국,20200602105117,U,2019-12-06 00:04:00.0,한식,203028.770828,445334.720543,강남구
52,폐업,20200602.0,,,,,110.4,137875.0,서울특별시 서초구 서초동 1576-4번지 1층 101호,서울특별시 서초구 서초중앙로 103 1층 101호 (서초동),6650.0,미스터피자 교대점,20200602164648,U,2019-12-06 00:04:00.0,기타,201159.654678,443187.071173,서초구
58,폐업,20210806.0,,,,,50.0,152854.0,서울특별시 구로구 구로동 429-43 1층,서울특별시 구로구 구로동로28길 51 1층 (구로동),8307.0,투보가요리집,20210806110231,U,2020-12-08 00:08:00.0,중국식,189942.919736,443201.373933,구로구
60,폐업,20200602.0,,,,02 356 5380,36.98,122923.0,서울특별시 은평구 응암동 126-5번지 1층 102호,서울특별시 은평구 응암로21가길 9-11 1층 102호 (응암동),3456.0,근린커피 브루잉,20200602151013,U,2019-12-06 00:04:00.0,기타,192540.370555,454712.243026,은평구


In [17]:
closed = closed[closed['폐업일자'] >= 20180820]

In [18]:
print(pd.unique(closed['휴업시작일자']))
print(pd.unique(closed['휴업종료일자']))
print(pd.unique(closed['재개업일자']))
print(pd.unique(closed['전화번호']))

[nan]
[nan]
[nan]
[nan '02  356 5380' '02 9622110' ... '02 3540818' '0209197488'
 '0202317945']


In [19]:
# 컬럼들 확인한 결과 내용이 전부 NaN이고 전화번호컬럼은 필요없으므로 제거
# 중복되는 정보인 도로명주소 외에 우편번호, 최종수정일자, 데이터갱신구분 컬럼 또한 필요없으므로 제거
closed = closed.drop(columns = ['휴업시작일자', '휴업종료일자', '재개업일자', '전화번호', '도로명주소', '소재지우편번호', '도로명우편번호', '최종수정일자', '데이터갱신구분', '데이터갱신일자'], axis = 1)
closed

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구)
18,폐업,20210430.0,14.70,서울특별시 서초구 양재동 120-10 1층,커피할루카,기타,203159.960000,441451.870000,서초구
50,폐업,20200602.0,12.00,서울특별시 강남구 논현동 226번지,김경자 3代 진국,한식,203028.770828,445334.720543,강남구
52,폐업,20200602.0,110.40,서울특별시 서초구 서초동 1576-4번지 1층 101호,미스터피자 교대점,기타,201159.654678,443187.071173,서초구
58,폐업,20210806.0,50.00,서울특별시 구로구 구로동 429-43 1층,투보가요리집,중국식,189942.919736,443201.373933,구로구
60,폐업,20200602.0,36.98,서울특별시 은평구 응암동 126-5번지 1층 102호,근린커피 브루잉,기타,192540.370555,454712.243026,은평구
...,...,...,...,...,...,...,...,...,...
468906,폐업,20211005.0,149.48,서울특별시 서초구 잠원동 28-8 1층,미인감자탕,한식,201713.624058,445611.458964,서초구
468912,폐업,20211005.0,83.33,서울특별시 강남구 역삼동 775-2 초원빌딩 1층 104호,만복회해산물,한식,203539.168707,443716.403617,강남구
468916,폐업,20211005.0,35.20,서울특별시 은평구 신사동 19-161 1층,술,한식,191896.691065,454928.218389,은평구
468918,폐업,20211005.0,66.44,서울특별시 성북구 정릉동 227-65,미진각,중국식,201144.184887,456583.708970,성북구


In [20]:
closed = closed.sort_values(ascending = True, by = '폐업일자')
closed

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구)
382801,폐업,20180820.0,14.02,서울특별시 강남구 도곡동 953-1번지,김밥이랑 국수랑,분식,202973.006006,442568.878946,강남구
71298,폐업,20180820.0,60.13,서울특별시 광진구 구의동 246-37번지,호반,한식,207523.199449,448359.093576,광진구
363537,폐업,20180820.0,171.12,서울특별시 서초구 반포동 1333-1번지 3층 4층 동작대교 전망카페,구름카페,경양식,198418.250195,444839.584709,서초구
346038,폐업,20180820.0,23.80,서울특별시 관악구 봉천동 1604-1번지,딸랏롯빠이 누들,기타,195840.823514,441869.843556,관악구
250043,폐업,20180820.0,229.02,서울특별시 구로구 오류동 306-4번지 랑데뷰 106107호,멘도롱 제주집 천왕역점,한식,185694.406018,443029.454825,구로구
...,...,...,...,...,...,...,...,...,...
118674,폐업,22020518.0,24.19,서울특별시 성북구 동소문동5가 53-1번지,영금,정종/대포집/소주방,201240.827382,454428.978859,성북구
70859,폐업,22020819.0,,서울특별시 광진구 구의동 225-41번지,목포식당,한식,207880.590451,448911.585027,광진구
164855,폐업,30000307.0,20.16,서울특별시 은평구 갈현동 507-33번지,모이자,분식,192151.789276,456900.365983,은평구
288281,폐업,30000904.0,91.14,서울특별시 송파구 마천동 307-22번지,디셈버,분식,213322.469786,443597.963276,송파구


In [21]:
closed = closed.reset_index(drop = True)
closed
# 읽기 편하게 인덱스를 리셋

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구)
0,폐업,20180820.0,14.02,서울특별시 강남구 도곡동 953-1번지,김밥이랑 국수랑,분식,202973.006006,442568.878946,강남구
1,폐업,20180820.0,60.13,서울특별시 광진구 구의동 246-37번지,호반,한식,207523.199449,448359.093576,광진구
2,폐업,20180820.0,171.12,서울특별시 서초구 반포동 1333-1번지 3층 4층 동작대교 전망카페,구름카페,경양식,198418.250195,444839.584709,서초구
3,폐업,20180820.0,23.80,서울특별시 관악구 봉천동 1604-1번지,딸랏롯빠이 누들,기타,195840.823514,441869.843556,관악구
4,폐업,20180820.0,229.02,서울특별시 구로구 오류동 306-4번지 랑데뷰 106107호,멘도롱 제주집 천왕역점,한식,185694.406018,443029.454825,구로구
...,...,...,...,...,...,...,...,...,...
37680,폐업,22020518.0,24.19,서울특별시 성북구 동소문동5가 53-1번지,영금,정종/대포집/소주방,201240.827382,454428.978859,성북구
37681,폐업,22020819.0,,서울특별시 광진구 구의동 225-41번지,목포식당,한식,207880.590451,448911.585027,광진구
37682,폐업,30000307.0,20.16,서울특별시 은평구 갈현동 507-33번지,모이자,분식,192151.789276,456900.365983,은평구
37683,폐업,30000904.0,91.14,서울특별시 송파구 마천동 307-22번지,디셈버,분식,213322.469786,443597.963276,송파구


In [22]:
closed['폐업일자'] = closed['폐업일자'].astype('float64')
closed = closed.drop(closed[closed['폐업일자'] > 20211016].index)
closed['폐업일자'] = closed['폐업일자'].astype('string')
closed
# 올바르지 못한 날짜 데이터를 가지고 있는 행 삭제

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구)
0,폐업,20180820.0,14.02,서울특별시 강남구 도곡동 953-1번지,김밥이랑 국수랑,분식,202973.006006,442568.878946,강남구
1,폐업,20180820.0,60.13,서울특별시 광진구 구의동 246-37번지,호반,한식,207523.199449,448359.093576,광진구
2,폐업,20180820.0,171.12,서울특별시 서초구 반포동 1333-1번지 3층 4층 동작대교 전망카페,구름카페,경양식,198418.250195,444839.584709,서초구
3,폐업,20180820.0,23.80,서울특별시 관악구 봉천동 1604-1번지,딸랏롯빠이 누들,기타,195840.823514,441869.843556,관악구
4,폐업,20180820.0,229.02,서울특별시 구로구 오류동 306-4번지 랑데뷰 106107호,멘도롱 제주집 천왕역점,한식,185694.406018,443029.454825,구로구
...,...,...,...,...,...,...,...,...,...
37671,폐업,20211005.0,29.10,서울특별시 강북구 번동 179-2,의정부참맛부대찌개,한식,203677.354658,458387.339745,강북구
37672,폐업,20211005.0,33.00,서울특별시 은평구 역촌동 43-77,김정준푸드,한식,192752.701315,455364.744276,은평구
37673,폐업,20211005.0,36.36,서울특별시 송파구 석촌동 276 림스빌딩,다우가 왕만두 찐빵,한식,209393.579691,444298.606562,송파구
37674,폐업,20211006.0,119.40,서울특별시 광진구 화양동 18-26 지하1층,놀이터7080,기타,206134.743579,449480.589211,광진구


In [23]:
closed['폐업연월'] = closed['폐업일자'].apply(lambda x: x[0:6])
closed
# 개월별로 집계하기 위해 폐업연월 컬럼 생성

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구),폐업연월
0,폐업,20180820.0,14.02,서울특별시 강남구 도곡동 953-1번지,김밥이랑 국수랑,분식,202973.006006,442568.878946,강남구,201808
1,폐업,20180820.0,60.13,서울특별시 광진구 구의동 246-37번지,호반,한식,207523.199449,448359.093576,광진구,201808
2,폐업,20180820.0,171.12,서울특별시 서초구 반포동 1333-1번지 3층 4층 동작대교 전망카페,구름카페,경양식,198418.250195,444839.584709,서초구,201808
3,폐업,20180820.0,23.80,서울특별시 관악구 봉천동 1604-1번지,딸랏롯빠이 누들,기타,195840.823514,441869.843556,관악구,201808
4,폐업,20180820.0,229.02,서울특별시 구로구 오류동 306-4번지 랑데뷰 106107호,멘도롱 제주집 천왕역점,한식,185694.406018,443029.454825,구로구,201808
...,...,...,...,...,...,...,...,...,...,...
37671,폐업,20211005.0,29.10,서울특별시 강북구 번동 179-2,의정부참맛부대찌개,한식,203677.354658,458387.339745,강북구,202110
37672,폐업,20211005.0,33.00,서울특별시 은평구 역촌동 43-77,김정준푸드,한식,192752.701315,455364.744276,은평구,202110
37673,폐업,20211005.0,36.36,서울특별시 송파구 석촌동 276 림스빌딩,다우가 왕만두 찐빵,한식,209393.579691,444298.606562,송파구,202110
37674,폐업,20211006.0,119.40,서울특별시 광진구 화양동 18-26 지하1층,놀이터7080,기타,206134.743579,449480.589211,광진구,202110


In [24]:
def sashimi(x):
    if x == '회집':
        return '횟집'
    else:
        return x

closed['업태구분명'] = closed['업태구분명'].apply(sashimi)
# 회집, 횟집 중복된 데이터 삭제

In [25]:
closed['폐업일자'] = closed['폐업일자'].astype('float64')

def covid19(x):
    if x >= 20200220:
        return "이후"
    else:
        return "이전"

closed['코로나전후여부'] = closed['폐업일자'].apply(covid19)
closed

Unnamed: 0,상세영업상태명,폐업일자,소재지면적,지번주소,사업장명,업태구분명,좌표정보(X),좌표정보(Y),지역(구),폐업연월,코로나전후여부
0,폐업,20180820.0,14.02,서울특별시 강남구 도곡동 953-1번지,김밥이랑 국수랑,분식,202973.006006,442568.878946,강남구,201808,이전
1,폐업,20180820.0,60.13,서울특별시 광진구 구의동 246-37번지,호반,한식,207523.199449,448359.093576,광진구,201808,이전
2,폐업,20180820.0,171.12,서울특별시 서초구 반포동 1333-1번지 3층 4층 동작대교 전망카페,구름카페,경양식,198418.250195,444839.584709,서초구,201808,이전
3,폐업,20180820.0,23.80,서울특별시 관악구 봉천동 1604-1번지,딸랏롯빠이 누들,기타,195840.823514,441869.843556,관악구,201808,이전
4,폐업,20180820.0,229.02,서울특별시 구로구 오류동 306-4번지 랑데뷰 106107호,멘도롱 제주집 천왕역점,한식,185694.406018,443029.454825,구로구,201808,이전
...,...,...,...,...,...,...,...,...,...,...,...
37671,폐업,20211005.0,29.10,서울특별시 강북구 번동 179-2,의정부참맛부대찌개,한식,203677.354658,458387.339745,강북구,202110,이후
37672,폐업,20211005.0,33.00,서울특별시 은평구 역촌동 43-77,김정준푸드,한식,192752.701315,455364.744276,은평구,202110,이후
37673,폐업,20211005.0,36.36,서울특별시 송파구 석촌동 276 림스빌딩,다우가 왕만두 찐빵,한식,209393.579691,444298.606562,송파구,202110,이후
37674,폐업,20211006.0,119.40,서울특별시 광진구 화양동 18-26 지하1층,놀이터7080,기타,206134.743579,449480.589211,광진구,202110,이후


In [26]:
closed_filtering = closed[['폐업연월', '지역(구)', '코로나전후여부', '업태구분명']]
closed_filtering

Unnamed: 0,폐업연월,지역(구),코로나전후여부,업태구분명
0,201808,강남구,이전,분식
1,201808,광진구,이전,한식
2,201808,서초구,이전,경양식
3,201808,관악구,이전,기타
4,201808,구로구,이전,한식
...,...,...,...,...
37671,202110,강북구,이후,한식
37672,202110,은평구,이후,한식
37673,202110,송파구,이후,한식
37674,202110,광진구,이후,기타


In [27]:
closed_filtered_group_categories = closed_filtering.groupby(['코로나전후여부', '업태구분명'])['지역(구)'].count().unstack()
closed_filtered_group_regions = closed_filtering.groupby(['코로나전후여부', '지역(구)'])['업태구분명'].count().unstack()

In [28]:
closed_filtered_group_categories

업태구분명,감성주점,경양식,기타,기타 휴게음식점,김밥(도시락),까페,냉면집,라이브카페,룸살롱,복어취급,...,중국식,출장조리,키즈카페,탕류(보신용),통닭(치킨),패밀리레스트랑,패스트푸드,한식,호프/통닭,횟집
코로나전후여부,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
이전,37.0,1497.0,2442.0,,117.0,603.0,6.0,25.0,1.0,11.0,...,535.0,14.0,8.0,18.0,436.0,11.0,165.0,7218.0,1802.0,97.0
이후,30.0,1530.0,3249.0,2.0,112.0,425.0,6.0,27.0,,5.0,...,644.0,16.0,17.0,31.0,380.0,29.0,123.0,7765.0,1835.0,97.0


In [29]:
closed_filtered_group_regions.reset_index(inplace = True)

In [30]:
closed_filtered_group_regions

지역(구),코로나전후여부,강남구,강동구,강북구,강서구,관악구,광진구,구로구,금천구,노원구,...,성동구,성북구,송파구,양천구,영등포구,용산구,은평구,종로구,중구,중랑구
0,이전,1882,644,426,800,757,600,588,393,545,...,427,515,1097,531,1168,593,719,567,546,429
1,이후,2159,654,730,1009,862,622,859,402,536,...,620,622,1157,685,812,694,630,671,653,476


In [31]:
closed_filtered_group_regions[closed_filtered_group_regions['코로나전후여부'] == '이전']


지역(구),코로나전후여부,강남구,강동구,강북구,강서구,관악구,광진구,구로구,금천구,노원구,...,성동구,성북구,송파구,양천구,영등포구,용산구,은평구,종로구,중구,중랑구
0,이전,1882,644,426,800,757,600,588,393,545,...,427,515,1097,531,1168,593,719,567,546,429


### 본격적인 분석 작업하기
#### 1) 폐업 이후 데이터만 가지고 분석

In [32]:
closed_group = closed.groupby(['지역(구)','폐업연월','코로나전후여부','업태구분명'])['사업장명'].count().unstack().fillna(0)
closed_group
# 폐업연월, 업태구분명, 지역구로 그룹화한 결과 (Nan에는 0 삽입)

Unnamed: 0_level_0,Unnamed: 1_level_0,업태구분명,감성주점,경양식,기타,기타 휴게음식점,김밥(도시락),까페,냉면집,라이브카페,룸살롱,복어취급,...,중국식,출장조리,키즈카페,탕류(보신용),통닭(치킨),패밀리레스트랑,패스트푸드,한식,호프/통닭,횟집
지역(구),폐업연월,코로나전후여부,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
강남구,201808,이전,0.0,12.0,6.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,13.0,0.0,0.0
강남구,201809,이전,0.0,15.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,3.0,0.0,5.0,20.0,4.0,0.0
강남구,201810,이전,1.0,20.0,10.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,4.0,34.0,4.0,0.0
강남구,201811,이전,0.0,9.0,14.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2.0,37.0,3.0,0.0
강남구,201812,이전,0.0,18.0,12.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,3.0,42.0,5.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
중랑구,202106,이후,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,5.0,0.0
중랑구,202107,이후,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,1.0,0.0
중랑구,202108,이후,0.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,0.0,10.0,3.0,0.0
중랑구,202109,이후,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,2.0,0.0,0.0,16.0,1.0,0.0


In [33]:
seoul_regions = pd.unique(closed['지역(구)'])
y_m = pd.unique(closed['폐업연월'])
categories = pd.unique(closed['업태구분명'])

closed_group

Unnamed: 0_level_0,Unnamed: 1_level_0,업태구분명,감성주점,경양식,기타,기타 휴게음식점,김밥(도시락),까페,냉면집,라이브카페,룸살롱,복어취급,...,중국식,출장조리,키즈카페,탕류(보신용),통닭(치킨),패밀리레스트랑,패스트푸드,한식,호프/통닭,횟집
지역(구),폐업연월,코로나전후여부,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
강남구,201808,이전,0.0,12.0,6.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,13.0,0.0,0.0
강남구,201809,이전,0.0,15.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,3.0,0.0,5.0,20.0,4.0,0.0
강남구,201810,이전,1.0,20.0,10.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,4.0,34.0,4.0,0.0
강남구,201811,이전,0.0,9.0,14.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2.0,37.0,3.0,0.0
강남구,201812,이전,0.0,18.0,12.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,3.0,42.0,5.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
중랑구,202106,이후,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,5.0,0.0
중랑구,202107,이후,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,1.0,0.0
중랑구,202108,이후,0.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,0.0,10.0,3.0,0.0
중랑구,202109,이후,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,2.0,0.0,0.0,16.0,1.0,0.0


In [35]:
# 폐업연월에 따른 지역별 총 폐업건수
closed_group_region = closed.groupby(['폐업연월', '지역(구)'])['사업장명'].count().unstack().fillna(0)
closed_group_region.reset_index(inplace=True)
closed_group_region.rename(columns={'폐업연월': '연월구분'}, inplace = True)

In [36]:
closed_group_region_calc = closed_group_region[12:]
closed_group_region_calc.reset_index(drop = True, inplace=True)

In [37]:
closed_group_region_calc = closed_group_region_calc[0:25]

In [38]:
closed_group_region_calc.drop(columns=['연월구분'], inplace= True)

In [39]:
seoul_group_region_count = pd.read_csv('./csvdatas/seoul_group_region_count.csv', encoding='cp949')
seoul_group_region_count.drop(columns=['연월구분'], inplace=True)

In [40]:
divided = seoul_group_region_count / closed_group_region_calc
# 기간별 배달건수 / 기간별 폐업점포수 (2019.08 ~ 2021.08 기간)

In [41]:
divided['연월구분'] = y_m[12:37]

In [42]:
divided.drop(columns = ['광진구', '종로구', '강동구', '동대문구', '성동구', '송파구', '중랑구'], inplace=True)

In [43]:
divided

Unnamed: 0,강남구,강북구,강서구,관악구,구로구,금천구,노원구,도봉구,동작구,마포구,서대문구,서초구,성북구,양천구,영등포구,용산구,은평구,중구,연월구분
0,0.285714,0.095238,6.525,160.525,1824.645161,77.291667,27.269231,1036.666667,0.0,17.590909,5.727273,0.0,48.1,150.964286,622.55814,0.0,568.296296,0.0,201908
1,69.323232,134.545455,8.888889,124.071429,818.245902,862.454545,20.115385,596.052632,0.0,16.652174,18.625,69.686275,48.333333,98.0,338.206349,0.0,210.142857,0.0,201909
2,148.505618,169.382353,38.762712,122.368421,1247.682927,956.238095,21.818182,575.478261,0.0,4.319588,31.636364,145.163265,105.461538,133.037037,474.190476,0.0,430.333333,0.0,201910
3,2.278261,289.470588,62.113208,63.367347,1362.825,1439.466667,11.357143,926.357143,0.027778,0.0,17.653846,5.477273,137.464286,98.882353,500.704545,0.033333,521.62963,0.0,201911
4,2.698113,233.5,94.684932,0.042553,1775.970588,1032.12,48.236842,480.433333,0.0,0.0,10.580645,0.792453,79.533333,86.6875,568.883721,0.0,343.615385,0.0,201912
5,1.402367,189.571429,0.583333,0.153846,3114.157895,995.2,215.666667,1291.818182,0.0,0.0,22.043478,1.103448,1.0,90.4,733.818182,0.0,546.423077,0.0,202001
6,3.125,286.0625,0.0,0.153846,2139.321429,1204.636364,216.827586,549.75,0.0,0.0,16.866667,24.409091,0.0,141.142857,825.962963,0.0,627.772727,0.0,202002
7,2.268657,225.166667,0.0,0.125,2214.925926,1619.315789,95.233333,1085.0,0.0,0.0,14.888889,13.403226,0.0,141.793103,750.333333,0.0,163.766667,0.0,202003
8,2.245455,220.173913,0.0,0.297297,1540.135135,1416.047619,89.428571,883.5,0.0,0.0,21.32,2.159091,0.0,103.645161,471.872727,0.0,501.2,0.0,202004
9,4.638554,450.4,0.0,0.222222,4219.571429,2060.214286,61.916667,1513.1,0.0625,0.0,17.32,3.882353,0.0,178.105263,988.939394,0.0,396.548387,0.0,202005


In [None]:
# plt.plot(divided['연월구분'], divided.iloc['강남구'])
divided['강남구']

In [None]:
# 지역구별 폐업한 업종 수 그룹화
closed_group_categories = closed.groupby(['업태구분명', '지역(구)'])['지번주소'].count().unstack().fillna(0)
closed_group_categories

#### 2) 개업중인 상점 데이터 분석

In [None]:
opening = df[df['상세영업상태명'] != '폐업']
opening = opening.drop(opening.columns[0:7], axis = 1)
opening = opening.drop(opening.columns[25:43], axis = 1)

In [None]:
opening = opening.drop(columns = ['휴업시작일자', '휴업종료일자', '재개업일자', '전화번호', '도로명주소', '소재지우편번호', '도로명우편번호', '최종수정일자', '데이터갱신구분', '데이터갱신일자'], axis = 1)
opening

In [None]:
opening = opening.drop(columns = ['폐업일자', '위생업태명', '남성종사자수', '여성종사자수', '영업장주변구분명', '등급구분명', '급수시설구분명', '총인원'], axis = 1)
opening

In [None]:
opening = opening.dropna(subset = ['지번주소'])
opening['지역(구)'] = opening['지번주소'].apply(goo)
opening

In [None]:
opening_group = opening.groupby(['업태구분명','지역(구)'])['지번주소'].count().unstack().fillna(0)
opening_group
# 지역구별 현재 영업중인 업태 수 그룹화

In [None]:
set(pd.unique(closed['업태구분명'])) - set(pd.unique(opening['업태구분명']))
