In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import statsmodels.api as sm

In [3]:
group2 = pd.read_csv('./data/group2.csv', index_col=0)
group2

Unnamed: 0,자치구,행정동,기초생활수급자인원,노년부양비,독거노인수,고령내국인전체,노령화지수,무료급식소개수,저체중률
0,종로구,사직동,125.0,27.1,66.0,1787.0,208.3,0.0,5.3
1,종로구,삼청동,45.0,35.1,23.0,596.0,266.4,0.0,5.3
2,종로구,부암동,174.0,26.2,74.0,1808.0,225.6,0.0,5.3
3,종로구,평창동,144.0,27.8,78.0,3474.0,192.8,0.0,5.3
4,종로구,무악동,206.0,26.7,108.0,1476.0,141.0,0.0,5.3
...,...,...,...,...,...,...,...,...,...
75,광진구,광장동,271.0,16.1,94.0,3986.0,72.4,0.0,6.5
76,광진구,자양1동,1294.0,20.8,449.0,3621.0,249.3,0.0,6.5
77,광진구,자양2동,883.0,23.6,291.0,4196.0,189.3,0.0,6.5
78,광진구,자양3동,381.0,22.4,149.0,4467.0,136.8,1.0,6.5


In [4]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
df = pd.DataFrame(ss.fit_transform(group2.iloc[:,[2,3,5,6,7]]))
df.rename(columns={0:'기초생활수급자인원',1:'노년부양비',2:'고령자수',3:'노령화지수',4:'무료급식소개수'},inplace=True)
data2=pd.concat([df, group2.loc[:,'저체중률']],axis=1)
data2

Unnamed: 0,기초생활수급자인원,노년부양비,고령자수,노령화지수,무료급식소개수,저체중률
0,-1.167135,0.711184,-0.546780,-0.403923,-0.253764,5.3
1,-1.395523,2.361502,-1.606765,0.109276,-0.253764,5.3
2,-1.027247,0.525523,-0.528090,-0.251112,-0.253764,5.3
3,-1.112893,0.855587,0.954643,-0.540835,-0.253764,5.3
4,-0.935892,0.628668,-0.823569,-0.998386,-0.253764,5.3
...,...,...,...,...,...,...
75,-0.750327,-1.558003,1.410320,-1.604332,-0.253764,6.5
76,2.170188,-0.588442,1.085472,-0.041769,-0.253764,6.5
77,0.996843,-0.010830,1.597220,-0.571751,-0.253764,6.5
78,-0.436293,-0.258378,1.838408,-1.035485,1.591790,6.5


In [5]:
# VIF_data1

from statsmodels.stats.outliers_influence import variance_inflation_factor

df2 = data2.drop('저체중률', axis=1)
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(df2.values, i) for i in range(df2.shape[1])]
vif["features"] = df2.columns
vif

Unnamed: 0,VIF Factor,features
0,2.266373,기초생활수급자인원
1,1.169613,노년부양비
2,2.589333,고령자수
3,1.911801,노령화지수
4,1.07282,무료급식소개수


## >> AHP 분석

### 1. 가중치 확인

In [6]:
# 쌍대비교행렬

li = [[1,1.333269231,1.224269231,1.320423,1.044769],
     [0.750036059,1,1.346038462,1.519192308,0.8845],
     [0.816813798,0.742920822,1,1.179385,0.878077],
     [0.757333,0.658244513,0.8479,1,0.538269],
     [0.957149,1.130582,1.138852,1.857806,1]]
col = ['기초생활수급자인원','고령자수','노인부양비','노령화지수','무료급식소개수']

ahp_df = pd.DataFrame(li, index=col, columns=col)
ahp_df

Unnamed: 0,기초생활수급자인원,고령자수,노인부양비,노령화지수,무료급식소개수
기초생활수급자인원,1.0,1.333269,1.224269,1.320423,1.044769
고령자수,0.750036,1.0,1.346038,1.519192,0.8845
노인부양비,0.816814,0.742921,1.0,1.179385,0.878077
노령화지수,0.757333,0.658245,0.8479,1.0,0.538269
무료급식소개수,0.957149,1.130582,1.138852,1.857806,1.0


In [7]:
# 데이터프레임의 컬럼수 파악
colums_cnt = len(ahp_df.columns)
colums_cnt

5

In [8]:
from scipy.stats.mstats import gmean

# 각 컬럼의 기하평균을 계산하여 리스트에 저장
geo_mean_list = []
for i in ahp_df:
    geo_mean_list.append(gmean(ahp_df[i]))
    
# 기하평균 출력
print('기하평균 (Geometric mean)\n')
for i,j in zip(ahp_df, geo_mean_list):
    print('{} : {}'.format(i, round(j,3)))  # 소수점 3 자리수

기하평균 (Geometric mean)

기초생활수급자인원 : 0.85
고령자수 : 0.941
노인부양비 : 1.097
노령화지수 : 1.345
무료급식소개수 : 0.847


In [9]:
# 데이터프레임에 기하평균값 추가
geo = pd.DataFrame(geo_mean_list, index=col).T

ahp_df = pd.concat([ahp_df, geo], ignore_index=True)
ahp_df

Unnamed: 0,기초생활수급자인원,고령자수,노인부양비,노령화지수,무료급식소개수
0,1.0,1.333269,1.224269,1.320423,1.044769
1,0.750036,1.0,1.346038,1.519192,0.8845
2,0.816814,0.742921,1.0,1.179385,0.878077
3,0.757333,0.658245,0.8479,1.0,0.538269
4,0.957149,1.130582,1.138852,1.857806,1.0
5,0.850148,0.940827,1.09736,1.34461,0.847325


In [10]:
# 가중치 계산을 위하여 기하평균의 합을 구함
geo_mean_sum = sum(geo_mean_list)
geo_mean_sum

5.0802703696540314

In [11]:
# 각 컬럼의 가중치를 계산하여 리스트에 저장
weights_list = []
for i in range(colums_cnt):
    weights_list.append(geo_mean_list[i]/geo_mean_sum)
    
# 기하평균 출력
print('가중치 (Weights)\n')
for i,j in zip(ahp_df, weights_list):
    print('{} : {}'.format(i, round(j,3)))  # 소수점 3 자리수

가중치 (Weights)

기초생활수급자인원 : 0.167
고령자수 : 0.185
노인부양비 : 0.216
노령화지수 : 0.265
무료급식소개수 : 0.167


In [12]:
# 가중치 합 확인
su_ = sum(list(weights_list))
print(round(su_,1))

1.0


### 2. 일관성 분석

- CR < 0.1일 경우: 쌍대 비교 행렬에 합리적인 일관성이 있음.
- 0.1 < CR < 0.2일 경우: 비일관성이 용납 가능한 수준임.
- 0.2 < CR일 경우: 일관성이 부족해 재조사가 필요함.

In [13]:
bi_df = ahp_df.drop(5)
wei_df = pd.DataFrame(weights_list, index=col)

print('>> 쌍대비교행렬 \n\n{}\n'.format(bi_df))
print('>> 가중치행렬 \n{}'.format(wei_df))

>> 쌍대비교행렬 

   기초생활수급자인원      고령자수     노인부양비     노령화지수   무료급식소개수
0   1.000000  1.333269  1.224269  1.320423  1.044769
1   0.750036  1.000000  1.346038  1.519192  0.884500
2   0.816814  0.742921  1.000000  1.179385  0.878077
3   0.757333  0.658245  0.847900  1.000000  0.538269
4   0.957149  1.130582  1.138852  1.857806  1.000000

>> 가중치행렬 
                  0
기초생활수급자인원  0.167343
고령자수       0.185192
노인부양비      0.216004
노령화지수      0.264673
무료급식소개수    0.166787


In [14]:
#1. 쌍대비교행렬과 가중치 행렬을 곱한다.

res1 = pd.DataFrame(bi_df.dot(wei_df))
print('>> Result1 \n{}'.format(res1))

>> Result1 
          0
0  1.202436
1  1.151068
2  0.948879
3  0.786236
4  1.274043


In [15]:
#2. 곱한 행렬의 각 값을 가중치로 나눈다.

res2 = []
for i in range(len(wei_df)):
    res2.append(res1.iloc[i,0]/wei_df.iloc[i,0])
print('>> Result2 \n{}'.format(res2))

>> Result2 
[7.185459679944544, 6.215527321394083, 4.3928709387664, 2.970593526114403, 7.6387231890897125]


In [16]:
#3. 각 값의 평균에서 요소의 개수를 빼고 요소의 개수보다 1작은 수로 나눈다.

consistency = (np.mean(res2)-5)/4
print('일관성 지수 : {}'.format(consistency))

일관성 지수 : 0.17015873276545723


 0.17 < 0.2 이므로 비일관성이 용납 가능한 수준

### 3. AHP 분석

In [17]:
group2_score = group2.copy()
group2_score= group2_score[['자치구','행정동']]
group2_score['점수'] = 0
group2_score['순위'] = 0
group2_score['점수'] = group2_score['점수'].astype('float')
group2_score['순위'] = group2_score['순위'].astype('int')
group2_score.head()

Unnamed: 0,자치구,행정동,점수,순위
0,종로구,사직동,0.0,0
1,종로구,삼청동,0.0,0
2,종로구,부암동,0.0,0
3,종로구,평창동,0.0,0
4,종로구,무악동,0.0,0


In [18]:
def score(dataframe1, dataframe2):
    for i in range(len(dataframe1)):
        dataframe1.loc[i,'점수'] = 0.167 * dataframe2.loc[i,'기초생활수급자인원'] + 0.185 * dataframe2.loc[i,'고령자수'] + 0.216 * dataframe2.loc[i,'노년부양비'] + 0.265 * dataframe2.loc[i,'노령화지수'] - 0.167 * dataframe2.loc[i,'무료급식소개수']
    dataframe1 = dataframe1.sort_values('점수', ascending=False)
    dataframe1 = dataframe1.reset_index(drop=True)
def rank(dataframe):
    for j in range(len(dataframe)):
        dataframe.loc[j,'순위'] = j+1

In [19]:
pd.set_option('display.max_rows', None)
score(group2_score,data2)

group2_score = group2_score.sort_values('점수', ascending=False)
group2_score.reset_index(drop=True,inplace=True)

rank(group2_score)
group2_score

Unnamed: 0,자치구,행정동,점수,순위
0,광진구,중곡4동,1.163974,1
1,중구,약수동,1.105766,2
2,중구,을지로동,0.951732,3
3,종로구,창신1동,0.803631,4
4,광진구,자양4동,0.699262,5
5,중구,회현동,0.697218,6
6,종로구,창신2동,0.669336,7
7,용산구,보광동,0.638074,8
8,중구,청구동,0.545279,9
9,광진구,중곡3동,0.52427,10


In [23]:
group2_score.to_csv('group2_score.csv', index=False, encoding='utf-8-sig')