# 라이브러리 로드

In [1]:
import numpy as np
import pandas as pd

pd.set_option('max_columns', 80) # 최대 컬럼 갯수 지정
pd.set_option('max_rows', 80) # 최대 로우 갯수 지정

##### 필요함수 정의

In [2]:
def load_data(path):
    df = pd.read_csv(path, encoding='CP949')
    df = df[ df['상권_구분_코드']=='A' ]  # 골목상권으로 테이블 제한
    
    return df

# 데이터 로드

In [3]:
df_2021 = load_data('./data/row_data/서울시 우리마을가게 상권분석서비스(상권-추정매출).csv')
df_2020 = load_data('./data/row_data/서울시 우리마을가게상권분석서비스(상권-추정매출)_2020.csv')
df_2019 = load_data('./data/row_data/서울시 우리마을가게 상권분석서비스(상권-추정매출)_2019.csv')
df_2018 = load_data('./data/row_data/서울시 우리마을가게 상권분석서비스(상권-추정매출)_2018.csv')
df_2017 = load_data('./data/row_data/서울시 우리마을가게 상권분석서비스(상권-추정매출)_2017.csv')

# 기준 인덱스가 될 테이블 생성
bs_area = gpd.read_file('data/geojson/상권_geometry_좌표.geojson', driver= 'GeoJSON')
bs_area= bs_area[bs_area['TRDAR_SE_C'] == 'A']

df_base = bs_area['TRDAR_CD'].sort_values( ).reset_index(drop=True).to_frame()
df_base.columns = ['상권_코드']

df_base.drop(804, axis=0, inplace=True)  # 2020년 이전은 존재하지 않는 상권코드(1000805)
df_base['상권_코드'] = df_base['상권_코드'].astype(int)
df_base.to_csv('./data/base_data/df_base.csv', index=False)

print(df_base.shape)
df_base.head()

(1009, 1)


Unnamed: 0,상권_코드
0,1000001
1,1000002
2,1000003
3,1000004
4,1000005


In [38]:
income_2017 = df_2017.groupby(['상권_코드'])['분기당_매출_건수'].sum().to_frame()
income_2018 = df_2018.groupby(['상권_코드'])['분기당_매출_건수'].sum().to_frame()
income_2019 = df_2019.groupby(['상권_코드'])['분기당_매출_건수'].sum().to_frame()
income_2020 = df_2020.groupby(['상권_코드'])['분기당_매출_건수'].sum().to_frame()
income_2021 = df_2021.groupby(['상권_코드'])['분기당_매출_건수'].sum().to_frame()

df_full = pd.merge(df_base, income_2017, left_on='상권_코드', right_index=True, )
df_full = pd.merge(df_full, income_2018, on='상권_코드', suffixes=('_2017', '_2018'))
df_full = pd.merge(df_full, income_2019, on='상권_코드', suffixes=('_2018', '_2019'))
df_full = pd.merge(df_full, income_2020, on='상권_코드', suffixes=('_2019', '_2020'))
df_full = pd.merge(df_full, income_2021, on='상권_코드', suffixes=('_2020', '_2021'))
df_full.set_index('상권_코드', inplace=True)
df_full.head()

Unnamed: 0_level_0,분기당_매출_건수_2017,분기당_매출_건수_2018,분기당_매출_건수_2019,분기당_매출_건수_2020,분기당_매출_건수
상권_코드,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1000001,1256289,1271610,1111664,995901,524965
1000002,1071093,1249259,1106194,841004,407875
1000003,1432450,1787480,1496646,1344778,601382
1000004,3201450,2529601,1521309,1105801,537648
1000005,517763,518973,396981,385697,171974


In [39]:
df_full['합계'] = df_full.sum(axis=1)
df_full.reset_index(inplace=True)
target = df_full[['상권_코드', '합계']].set_index('상권_코드')

target.to_csv('./data/base_data/target.csv')
pd.read_csv('./data/base_data/target.csv').head()

Unnamed: 0,상권_코드,합계
0,1000001,5160429
1,1000002,4675425
2,1000003,6662736
3,1000004,8895809
4,1000005,1991388


In [3]:
load_data('서울시 우리마을가게 상권분석서비스(상권-상권변화지표).csv')

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,상권_변화_지표,상권_변화_지표_명,운영_영업_개월_평균,폐업_영업_개월_평균,서울_운영_영업_개월_평균,서울_폐업_영업_개월_평균
486,2021,3,A,골목상권,1001010,풍성로37가길,LL,다이나믹,109,43,117,55
487,2021,3,A,골목상권,1001009,천호옛길,HL,상권축소,134,52,117,55
488,2021,3,A,골목상권,1001008,천호옛14길,LL,다이나믹,115,52,117,55
489,2021,3,A,골목상권,1001007,천호대로219길,LH,상권확장,110,55,117,55
490,2021,3,A,골목상권,1001006,천호대로197길,LH,상권확장,100,55,117,55
...,...,...,...,...,...,...,...,...,...,...,...,...
46371,2014,1,A,골목상권,1000005,백석동길,LH,상권확장,67,44,91,44
46372,2014,1,A,골목상권,1000004,명륜길,LH,상권확장,83,55,91,44
46373,2014,1,A,골목상권,1000003,돈화문로11가길,HH,정체,97,46,91,44
46374,2014,1,A,골목상권,1000002,난계로27길,HL,상권축소,99,37,91,44
