In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


In [2]:
train = pd.read_csv('./Dataset/train_Winsorization.csv', encoding='euc-kr')
test = pd.read_csv('./Dataset/test.csv', encoding='euc-kr')

In [3]:
train['기업수명주기'] = train['기업수명주기'].map({
    '도입기' : 1,
    '성장기' : 2,
    '성숙기' : 3,
    '수축기' : 4,
    '쇠퇴기' : 5
}).astype('category')

test['기업수명주기'] = test['기업수명주기'].map({
    '도입기' : 1,
    '성장기' : 2,
    '성숙기' : 3,
    '수축기' : 4,
    '쇠퇴기' : 5
}).astype('category')

In [4]:
len(train.columns)

50

In [5]:
train_int = train[['부채비율', '당좌비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
      '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률', '재고자산회전률',
       '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율', '순이익증가율',
       '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율', '부가가치율',
       '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계',
       'OCF이자보상배율', '부채상환계수', '장기부채상환능력', '매출액대비금융비용상환능력', '연구개발비대비매출액', '매출액대비현금흐름',
       '매출액대비잉여현금흐름', '총자산대비현금흐름', '총자산대비영업현금흐름', '총자산대비잉여현금흐름',
       't-1감사의견코드']]
test_int = test[['부채비율', '당좌비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
      '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률', '재고자산회전률',
       '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율', '순이익증가율',
       '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율', '부가가치율',
       '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계',
       'OCF이자보상배율', '부채상환계수', '장기부채상환능력', '매출액대비금융비용상환능력', '연구개발비대비매출액', '매출액대비현금흐름',
       '매출액대비잉여현금흐름', '총자산대비현금흐름', '총자산대비영업현금흐름', '총자산대비잉여현금흐름',
       't-1감사의견코드']]

# Standard Scaler

In [6]:
from sklearn.preprocessing import StandardScaler
X_train = train_int.drop('t-1감사의견코드', axis=1)
y_train = train_int[['t-1감사의견코드']]

X_test = test_int.drop('t-1감사의견코드', axis=1)
y_test = test_int[['t-1감사의견코드']]

In [7]:
train_cat = train[['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부']]
test_cat = test[['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부']]

In [8]:
scaler = StandardScaler()
train_sc = scaler.fit_transform(X_train)
test_sc = scaler.transform(X_test)

train_sc = pd.DataFrame(train_sc, columns=X_train.columns)
test_sc = pd.DataFrame(test_sc, columns=X_test.columns)

# 스케일링 + 카테고리 변수 +타겟 데이터프레임
train_sc_total = pd.concat([train_sc,train_cat ,y_train], axis=1)
test_sc_total = pd.concat([test_sc, test_cat,y_test], axis=1)

- train : 50개
- train_int : 41개(타겟 포함)
- train_cat : 6개
- train_sc_total : 47개(타겟 포함)
- '회사명', '거래소코드', 'Year' 제외 (3개)

---

# Undersampling

In [9]:
from imblearn.under_sampling import OneSidedSelection
from collections import Counter

In [10]:
print(train_sc_total['t-1감사의견코드'].value_counts())
print(test_sc_total['t-1감사의견코드'].value_counts())

t-1감사의견코드
0.0    128076
1.0      9602
Name: count, dtype: int64
t-1감사의견코드
0.0    38257
1.0     2868
Name: count, dtype: int64


In [11]:
train_sc_total.drop('t-1감사의견코드',axis=1)

Unnamed: 0,부채비율,당좌비율,유동비율,이자보상배율,차입금의존도,자기자본구성비율,매출액영업이익률,자기자본순이익률,총자본순이익률,총자본회전률,...,매출액대비잉여현금흐름,총자산대비현금흐름,총자산대비영업현금흐름,총자산대비잉여현금흐름,기업수명주기,이보배초과여부,파부비초과여부,파당비초과여부,파차의초과여부,파로이초과여부
0,0.298383,0.173476,0.082420,-0.252413,-1.049208,-1.162786,-1.581511,-11.727650,-5.864452,-0.679320,...,0.000194,8.381664,-1.599831,-0.843661,5,0,1,0,0,1
1,-0.228212,-0.232380,-0.262760,-0.245176,0.250579,0.331320,0.192509,0.060807,0.210495,-0.561237,...,0.117963,-0.112725,0.335578,0.424888,3,0,0,0,0,0
2,-0.093818,-0.172586,-0.213088,-0.313191,-1.247746,-0.700586,0.022235,-0.309371,-0.362267,-0.722824,...,-0.038364,-4.234050,-3.140765,-2.132309,5,0,1,0,0,0
3,-0.154482,-0.251870,-0.278951,-0.243238,1.053533,-0.438039,0.333791,0.803830,0.997786,-0.536377,...,-0.055781,-1.765124,0.004584,-3.364207,2,0,0,0,1,0
4,-0.097999,-0.287596,-0.194989,-0.243697,-1.190369,-0.687056,0.207911,5.605906,1.659084,-0.057830,...,0.125221,-0.141415,2.120649,1.949162,4,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137673,-0.093123,-0.209503,-0.189677,-0.243783,-0.608773,-0.702780,0.116263,0.819827,0.444094,0.582305,...,0.110991,-0.158204,0.057808,0.339583,3,0,1,0,0,0
137674,-0.215400,-0.116949,-0.154168,-0.246047,-0.425558,0.122525,0.108682,-0.002059,-0.013570,1.104357,...,0.111585,-0.331277,0.438023,0.675581,4,0,0,0,0,0
137675,-0.254527,-0.006997,0.016630,-0.230915,-0.635506,1.003777,0.172913,0.691009,2.212775,0.458007,...,0.115423,2.250956,1.243888,1.053845,2,0,0,0,0,0
137676,-0.265038,0.063887,-0.007410,-0.255136,-1.015303,1.441477,0.098965,-0.106601,-0.142288,-0.238062,...,0.113132,0.572191,0.056037,0.411884,4,0,0,0,0,0


In [12]:
from imblearn.under_sampling import OneSidedSelection
from imblearn.under_sampling import RandomUnderSampler

# 't-1감사의견코드' 열을 제외한 데이터와 해당 열만을 추출합니다.
train_X = train_sc_total.drop('t-1감사의견코드', axis=1)
train_y = train_sc_total['t-1감사의견코드']

# OneSidedSelection 적용
undersampler = OneSidedSelection(random_state=42)
train_X_resampled, train_y_resampled = undersampler.fit_resample(train_X, train_y)

# 원하는 샘플링 비율로 데이터셋을 다시 조정합니다.
ratio = 0.33  # 3:1 비율을 의미합니다.
custom_undersampler = RandomUnderSampler(sampling_strategy=ratio, random_state=42)
train_X_resampled, train_y_resampled = custom_undersampler.fit_resample(train_X_resampled, train_y_resampled)


In [13]:
from imblearn.under_sampling import OneSidedSelection
from imblearn.under_sampling import RandomUnderSampler

# 't-1감사의견코드' 열을 제외한 데이터와 해당 열만을 추출합니다.
test_X = test_sc_total.drop('t-1감사의견코드', axis=1)
test_y = test_sc_total['t-1감사의견코드']

# OneSidedSelection 적용
undersampler = OneSidedSelection(random_state=42)
test_X_resampled, test_y_resampled = undersampler.fit_resample(test_X, test_y)

# 원하는 샘플링 비율로 데이터셋을 다시 조정합니다.
ratio = 0.33  # 3:1 비율을 의미합니다.
custom_undersampler = RandomUnderSampler(sampling_strategy=ratio, random_state=42)
test_X_resampled, test_y_resampled = custom_undersampler.fit_resample(test_X_resampled, test_y_resampled)


In [14]:
print(train_X_resampled.shape)
print(test_X_resampled.shape)

(38698, 46)
(11558, 46)


---


- 데이터 프레임 정리

In [15]:
train_y_resampled=pd.DataFrame(train_y_resampled,columns=['t-1감사의견코드'])
test_y_resampled=pd.DataFrame(test_y_resampled,columns=['t-1감사의견코드'])

In [16]:
train_int_resampled = train_X_resampled.drop(['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부'],axis=1)
test_int_resampled = test_X_resampled.drop(['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부'],axis=1)
train_cat_resampled= train_X_resampled[['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부']]
test_cat_resampled= test_X_resampled[['기업수명주기','이보배초과여부', '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부']]

In [17]:
train_resampled_sum = pd.concat([train_X_resampled,train_y_resampled],axis=1)
test_resampled_sum = pd.concat([test_X_resampled,test_y_resampled],axis=1)

train_int_resampled_sum = pd.concat([train_int_resampled,train_y_resampled],axis=1)
train_cat_resampled_sum =pd.concat([train_cat_resampled,train_y_resampled],axis=1)
test_cat_resampled_sum=pd.concat([test_cat_resampled,test_y_resampled],axis=1)

---

# MDA

In [18]:
# 1-1 정규성 테스트(샤피로)
from scipy.stats import norm
from scipy import stats
from statsmodels.formula.api import ols
from scipy.stats import kstest

for j in train_int_resampled_sum.columns:
    a = stats.shapiro(train_int_resampled_sum[j])
    p = a.pvalue
    print(j,a)

부채비율 ShapiroResult(statistic=0.25304025411605835, pvalue=0.0)
당좌비율 ShapiroResult(statistic=0.2545449137687683, pvalue=0.0)
유동비율 ShapiroResult(statistic=0.25518113374710083, pvalue=0.0)
이자보상배율 ShapiroResult(statistic=0.27373480796813965, pvalue=0.0)
차입금의존도 ShapiroResult(statistic=0.9469869136810303, pvalue=0.0)
자기자본구성비율 ShapiroResult(statistic=0.9468852877616882, pvalue=0.0)
매출액영업이익률 ShapiroResult(statistic=0.14291447401046753, pvalue=0.0)
자기자본순이익률 ShapiroResult(statistic=0.5321347713470459, pvalue=0.0)
총자본순이익률 ShapiroResult(statistic=0.7789320945739746, pvalue=0.0)
총자본회전률 ShapiroResult(statistic=0.666995644569397, pvalue=0.0)
자기자본회전률 ShapiroResult(statistic=0.41062939167022705, pvalue=0.0)
운전자본회전률 ShapiroResult(statistic=0.12193131446838379, pvalue=0.0)
순운전자본회전률 ShapiroResult(statistic=0.7332994341850281, pvalue=0.0)
재고자산회전률 ShapiroResult(statistic=0.18447226285934448, pvalue=0.0)
당좌자산회전률 ShapiroResult(statistic=0.5867840647697449, pvalue=0.0)
유동자산회전률 ShapiroResult(statistic=0.64195990

In [19]:
# 1-2 정규성 테스트(K-S)
for j in train_resampled_sum.columns:
    a = kstest(train_resampled_sum[j],'norm')
    p = a.pvalue
    print(j,a)

부채비율 KstestResult(statistic=0.3906751098050474, pvalue=0.0, statistic_location=-0.2775598980715014, statistic_sign=-1)
당좌비율 KstestResult(statistic=0.36984663905227333, pvalue=0.0, statistic_location=-0.3322595537125994, statistic_sign=-1)
유동비율 KstestResult(statistic=0.36477221111440783, pvalue=0.0, statistic_location=-0.34573161526781654, statistic_sign=-1)
이자보상배율 KstestResult(statistic=0.44858521060291295, pvalue=0.0, statistic_location=-0.19129502231396486, statistic_sign=1)
차입금의존도 KstestResult(statistic=0.10606206285734993, pvalue=0.0, statistic_location=-1.247745901800868, statistic_sign=-1)
자기자본구성비율 KstestResult(statistic=0.09775745836390909, pvalue=0.0, statistic_location=-0.32797374429649506, statistic_sign=1)
매출액영업이익률 KstestResult(statistic=0.4177780186228714, pvalue=0.0, statistic_location=-0.0041342989796015256, statistic_sign=-1)
자기자본순이익률 KstestResult(statistic=0.2663070708196953, pvalue=0.0, statistic_location=-0.25646853214416876, statistic_sign=-1)
총자본순이익률 KstestResult(st

-p-value가 0 이는 데이터개수가 많아서 p-value 자체가 너무작아 계산이 불가능하다고 판단. 중심극한 정리에 의해서 정규성이 있다고 가정하고 진행

In [20]:
# 부도기업과 정상기업의 피처별 등분산비교 (정규성 가정으로 bartlett)
Bad = train_int_resampled_sum[train_int_resampled_sum['t-1감사의견코드']== 1] #Existing Customer
Good = train_int_resampled_sum[train_int_resampled_sum['t-1감사의견코드']== 0] #Attrited Customer

c = []
for i in train_int_resampled_sum:
    # lresult = stats.levene(close[i], normal[i])
    lresult = stats.bartlett(Bad[i], Good[i])
    c.append([i,lresult[-1]])

c= pd.DataFrame(c)
c.columns=["피처값",'F-test']
c

Unnamed: 0,피처값,F-test
0,부채비율,3.470832e-187
1,당좌비율,0.0
2,유동비율,4.0134509999999996e-266
3,이자보상배율,6.830335999999999e-225
4,차입금의존도,2.223719e-05
5,자기자본구성비율,8.770644e-82
6,매출액영업이익률,0.4806339
7,자기자본순이익률,0.0
8,총자본순이익률,0.0
9,총자본회전률,0.0


In [21]:
# F-test 결과 0.05 이상이면 homo 0.05 이하이면 hetero
c["분산"] =''
c["T-test"] =""
for i in c.index:
    if c.loc[i,"F-test"]>=0.05:
        c.loc[i,"분산"] = "homo"
    else:
        c.loc[i,"분산"] = "hetero"
c

Unnamed: 0,피처값,F-test,분산,T-test
0,부채비율,3.470832e-187,hetero,
1,당좌비율,0.0,hetero,
2,유동비율,4.0134509999999996e-266,hetero,
3,이자보상배율,6.830335999999999e-225,hetero,
4,차입금의존도,2.223719e-05,hetero,
5,자기자본구성비율,8.770644e-82,hetero,
6,매출액영업이익률,0.4806339,homo,
7,자기자본순이익률,0.0,hetero,
8,총자본순이익률,0.0,hetero,
9,총자본회전률,0.0,hetero,


In [22]:
c[c["분산"]=='homo']

Unnamed: 0,피처값,F-test,분산,T-test
6,매출액영업이익률,0.480634,homo,
24,총자본투자효율,0.106515,homo,


In [25]:
# homo 인 feature 는 student t-test, hetero이면 Welchs T-Test 진행
c["분산"] =''
c["T-test"] =""
for i in c.index:
    if c.loc[i,"F-test"]>=0.05:
        c.loc[i,"분산"] = "homo"
        result = stats.ttest_ind(Bad[c.loc[i,"피처값"]], Good[c.loc[i,"피처값"]], equal_var=True)       ## equal_var = True Student T-test
        c.loc[i,"T-test"] = result[-1]
        print(Bad[c.loc[i,"피처값"]])
    else:
        c.loc[i,"분산"] = "hetero"
        result = stats.ttest_ind(Bad[c.loc[i,"피처값"]], Good[c.loc[i,"피처값"]], equal_var=False)      ## equal_var = False Welchs T-Test
        c.loc[i,"T-test"] = result[-1]
c

29096    -1.581632
29097   -12.349735
29098     0.029493
29099    -0.025061
29100    -0.125942
           ...    
38693     0.100578
38694     0.104771
38695     0.102997
38696     0.102392
38697     0.096989
Name: 매출액영업이익률, Length: 9602, dtype: float64
29096   -0.614022
29097   -0.776401
29098   -0.463187
29099   -0.746979
29100   -0.010685
           ...   
38693   -0.460953
38694   -0.295594
38695    0.117431
38696    0.542374
38697    0.197503
Name: 총자본투자효율, Length: 9602, dtype: float64


Unnamed: 0,피처값,F-test,분산,T-test
0,부채비율,3.470832e-187,hetero,0.0
1,당좌비율,0.0,hetero,0.0
2,유동비율,4.0134509999999996e-266,hetero,0.0
3,이자보상배율,6.830335999999999e-225,hetero,0.0
4,차입금의존도,2.223719e-05,hetero,0.0
5,자기자본구성비율,8.770644e-82,hetero,0.0
6,매출액영업이익률,0.4806339,homo,0.024044
7,자기자본순이익률,0.0,hetero,0.001189
8,총자본순이익률,0.0,hetero,0.0
9,총자본회전률,0.0,hetero,0.0


In [26]:
# 0.05 이상이면 유의미하지 않으므로 0.05 이하인 것만 추출
d = c[c["T-test"]<= 0.05]
d.sort_values('T-test',ascending=False)["피처값"].unique()
# d.sort_values('T-test',ascending=False)["피처값"]

array(['운전자본회전률', '매출액영업이익률', '이윤분배율', '재고자산회전률', '총자본투자효율', '자기자본순이익률',
       '매출액대비잉여현금흐름', '노동소득분배율', '매출액대비금융비용상환능력', '자본분배율', '부가가치율',
       '장기부채상환능력', '유동비율', '부채비율', '유형자산증가율', '이자보상배율', '총자본순이익률',
       '총자산대비현금흐름', '당좌비율', '총자산대비영업현금흐름', 'OCF이자보상배율', '매출액증가율',
       '재고자산증가율', '영업이익증가율', '유동자산증가율', '순이익증가율', '자기자본증가율', '총자본증가율',
       '차입금의존도', '자기자본회전률', '유동자산회전률', 'log자산총계', '당좌자산회전률', '순운전자본회전률',
       '총자본회전률', '자기자본구성비율', '총자산대비잉여현금흐름', 't-1감사의견코드'], dtype=object)

In [27]:
d.sort_values('T-test',ascending=False).dropna()

Unnamed: 0,피처값,F-test,분산,T-test
11,운전자본회전률,5.972947000000001e-119,hetero,0.04792
6,매출액영업이익률,0.4806339,homo,0.024044
28,이윤분배율,6.635528e-24,hetero,0.009042
13,재고자산회전률,1.409111e-287,hetero,0.00378
24,총자본투자효율,0.1065153,homo,0.002678
7,자기자본순이익률,0.0,hetero,0.001189
36,매출액대비잉여현금흐름,1.1161980000000001e-31,hetero,9.8e-05
26,노동소득분배율,1.378489e-15,hetero,9.8e-05
33,매출액대비금융비용상환능력,0.0,hetero,2e-06
27,자본분배율,4.605442e-12,hetero,1e-06


In [28]:
d['피처값'].values

array(['부채비율', '당좌비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
       '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률',
       '재고자산회전률', '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율',
       '순이익증가율', '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율',
       '부가가치율', '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계', 'OCF이자보상배율',
       '장기부채상환능력', '매출액대비금융비용상환능력', '매출액대비잉여현금흐름', '총자산대비현금흐름',
       '총자산대비영업현금흐름', '총자산대비잉여현금흐름', 't-1감사의견코드'], dtype=object)

In [29]:

fea = ['부채비율', '당좌비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
       '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률',
       '재고자산회전률', '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율',
       '순이익증가율', '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율',
       '부가가치율', '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계', 'OCF이자보상배율',
       '장기부채상환능력', '매출액대비금융비용상환능력', '매출액대비잉여현금흐름', '총자산대비현금흐름',
       '총자산대비영업현금흐름', '총자산대비잉여현금흐름', 't-1감사의견코드']
mda_feature = train_resampled_sum[fea]
mda_feature

Unnamed: 0,부채비율,당좌비율,유동비율,이자보상배율,차입금의존도,자기자본구성비율,매출액영업이익률,자기자본순이익률,총자본순이익률,총자본회전률,...,이윤분배율,log자산총계,OCF이자보상배율,장기부채상환능력,매출액대비금융비용상환능력,매출액대비잉여현금흐름,총자산대비현금흐름,총자산대비영업현금흐름,총자산대비잉여현금흐름,t-1감사의견코드
0,-0.231628,3.772138,3.063858,-0.247798,-0.197353,0.397139,0.169123,-0.125685,-0.221971,-0.760114,...,0.090608,0.395970,-0.249884,0.328765,0.078877,0.016665,-1.148654,-0.397408,-0.320747,0.0
1,-0.110268,-0.200506,-0.236282,1.738226,0.990940,-0.644274,0.094207,-0.114459,-0.213117,-0.536377,...,0.136494,2.700362,-0.412296,1.784102,0.243912,0.044539,0.299912,-0.889592,-1.869636,0.0
2,-0.277560,-0.290706,-0.311212,-0.247707,2.539796,-1.433378,0.362217,-0.132982,-0.227419,-0.679320,...,0.044142,2.054257,-0.249213,0.149071,0.076951,0.110013,-0.124571,-0.166107,0.122531,0.0
3,-0.120838,-0.329571,-0.341663,-0.247686,1.154595,-0.602954,0.139810,-0.116985,-0.221971,-0.691750,...,0.056135,-0.482406,-0.249018,0.063913,0.078472,0.114903,-0.173929,-0.148348,0.199293,0.0
4,-0.136201,-0.279093,-0.226659,-0.247791,0.793380,-0.534940,0.098401,-0.282990,-0.439225,-0.206987,...,-0.095081,0.010325,-0.249316,0.282330,0.085466,0.100077,-0.205135,-0.262452,-0.789810,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38693,1.343621,-0.199881,-0.221781,1.738226,1.445392,-1.332454,0.100578,2.808800,0.154649,1.589119,...,0.448428,-0.883544,-0.412296,-0.039992,-14.735013,0.099393,0.262653,-2.958654,-2.005218,1.0
38694,-0.165591,-0.203398,-0.216638,-0.229166,-0.962816,-0.367831,0.104771,0.123813,0.123321,0.209411,...,0.218533,-0.522700,-0.285575,-0.039992,-0.548727,0.113594,0.315837,-0.585401,0.419038,1.0
38695,-0.255627,0.031301,0.019563,-0.227858,-1.106911,1.043269,0.102997,-0.043034,0.047724,0.433148,...,0.161465,-0.851760,-0.235670,-0.039992,-0.474477,0.110941,0.849720,0.036248,0.303795,1.0
38696,0.006268,-0.256943,-0.270608,-0.246838,0.803812,-0.922910,0.102392,1.168115,0.641598,3.316862,...,0.158104,-1.094728,-0.247405,-0.001196,0.105620,0.109466,-0.197040,0.738049,-0.080856,1.0


In [30]:
def vif(data):
    import pandas as pd
    from statsmodels.stats.outliers_influence import variance_inflation_factor

    # VIF 출력을 위한 데이터 프레임 형성
    vif = pd.DataFrame()

    # VIF 값과 각 Feature 이름에 대해 설정
    vif["VIF Factor"] = [variance_inflation_factor(data.values, i) for i in range(len(data.columns))]
    vif["features"] = data.columns

    # VIF 값이 높은 순으로 정렬
    vif = vif.sort_values(by="VIF Factor", ascending=False)
    vif = vif.reset_index().drop(columns='index')

    return vif

vif(mda_feature)

Unnamed: 0,VIF Factor,features
0,10.811082,당좌비율
1,10.61305,유동비율
2,7.700772,자본분배율
3,7.549531,노동소득분배율
4,5.764823,순운전자본회전률
5,4.792003,총자본회전률
6,4.666801,유동자산회전률
7,3.472887,당좌자산회전률
8,3.231626,자기자본구성비율
9,3.150953,차입금의존도


In [31]:

fea = ['부채비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
       '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률',
       '재고자산회전률', '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율',
       '순이익증가율', '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율',
       '부가가치율', '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계', 'OCF이자보상배율',
       '장기부채상환능력', '매출액대비금융비용상환능력', '매출액대비잉여현금흐름', '총자산대비현금흐름',
       '총자산대비영업현금흐름', '총자산대비잉여현금흐름', 't-1감사의견코드']
mda_feature2 = train_resampled_sum[fea]
mda_feature2

Unnamed: 0,부채비율,유동비율,이자보상배율,차입금의존도,자기자본구성비율,매출액영업이익률,자기자본순이익률,총자본순이익률,총자본회전률,자기자본회전률,...,이윤분배율,log자산총계,OCF이자보상배율,장기부채상환능력,매출액대비금융비용상환능력,매출액대비잉여현금흐름,총자산대비현금흐름,총자산대비영업현금흐름,총자산대비잉여현금흐름,t-1감사의견코드
0,-0.231628,3.063858,-0.247798,-0.197353,0.397139,0.169123,-0.125685,-0.221971,-0.760114,-0.405471,...,0.090608,0.395970,-0.249884,0.328765,0.078877,0.016665,-1.148654,-0.397408,-0.320747,0.0
1,-0.110268,-0.236282,1.738226,0.990940,-0.644274,0.094207,-0.114459,-0.213117,-0.536377,-0.307521,...,0.136494,2.700362,-0.412296,1.784102,0.243912,0.044539,0.299912,-0.889592,-1.869636,0.0
2,-0.277560,-0.311212,-0.247707,2.539796,-1.433378,0.362217,-0.132982,-0.227419,-0.679320,-0.412412,...,0.044142,2.054257,-0.249213,0.149071,0.076951,0.110013,-0.124571,-0.166107,0.122531,0.0
3,-0.120838,-0.341663,-0.247686,1.154595,-0.602954,0.139810,-0.116985,-0.221971,-0.691750,-0.361509,...,0.056135,-0.482406,-0.249018,0.063913,0.078472,0.114903,-0.173929,-0.148348,0.199293,0.0
4,-0.136201,-0.226659,-0.247791,0.793380,-0.534940,0.098401,-0.282990,-0.439225,-0.206987,-0.152498,...,-0.095081,0.010325,-0.249316,0.282330,0.085466,0.100077,-0.205135,-0.262452,-0.789810,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38693,1.343621,-0.221781,1.738226,1.445392,-1.332454,0.100578,2.808800,0.154649,1.589119,9.473591,...,0.448428,-0.883544,-0.412296,-0.039992,-14.735013,0.099393,0.262653,-2.958654,-2.005218,1.0
38694,-0.165591,-0.216638,-0.229166,-0.962816,-0.367831,0.104771,0.123813,0.123321,0.209411,0.011009,...,0.218533,-0.522700,-0.285575,-0.039992,-0.548727,0.113594,0.315837,-0.585401,0.419038,1.0
38695,-0.255627,0.019563,-0.227858,-1.106911,1.043269,0.102997,-0.043034,0.047724,0.433148,-0.182577,...,0.161465,-0.851760,-0.235670,-0.039992,-0.474477,0.110941,0.849720,0.036248,0.303795,1.0
38696,0.006268,-0.270608,-0.246838,0.803812,-0.922910,0.102392,1.168115,0.641598,3.316862,3.234102,...,0.158104,-1.094728,-0.247405,-0.001196,0.105620,0.109466,-0.197040,0.738049,-0.080856,1.0


In [32]:
vif(mda_feature2)

Unnamed: 0,VIF Factor,features
0,7.694714,자본분배율
1,7.548258,노동소득분배율
2,5.764467,순운전자본회전률
3,4.790883,총자본회전률
4,4.620936,유동자산회전률
5,3.41998,당좌자산회전률
6,3.211504,자기자본구성비율
7,3.148074,차입금의존도
8,2.659705,이윤분배율
9,2.522093,총자산대비잉여현금흐름


In [33]:
fea_hetero= ['부채비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율',
       '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률',
       '재고자산회전률', '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율',
       '순이익증가율', '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', 
       '부가가치율', '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계', 'OCF이자보상배율',
       '장기부채상환능력', '매출액대비금융비용상환능력', '매출액대비잉여현금흐름', '총자산대비현금흐름',
       '총자산대비영업현금흐름', '총자산대비잉여현금흐름']
fea_homo=['매출액영업이익률','총자본투자효율']

In [34]:
# Welch's t-test 수행
t_stat, p_value = stats.ttest_ind(Bad[fea_hetero], Good[fea_hetero], equal_var=False)
result_df_hetero = pd.DataFrame({'t-statistic': t_stat, 'p-value': p_value}, index=fea_hetero)

# t-test 수행
t_stat, p_value = stats.ttest_ind(Bad[fea_homo], Good[fea_homo], equal_var=True)
result_df_homo = pd.DataFrame({'t-statistic': t_stat, 'p-value': p_value}, index=fea_homo)

result_df = pd.concat([result_df_hetero,result_df_homo],axis=0)


result_df = result_df.sort_values('p-value', ascending=True).reset_index()

In [35]:
result_df = result_df[['index', 'p-value']]
result_df.columns = ['Variable', 'p-value']

---

# Chi 2

* 카이제곱 검정 조건
    * 종속변인은 범주형 자료여야 한다.
    * 기대빈도가 5이하인 셀이 전체의 20%가 넘지 않아야 한다.
    * 각 칸의 빈도는 다른 칸의 빈도와 독립적이어야 한다.

In [36]:
from scipy.stats import chi2_contingency

# 기대빈도가 5 이하인 항목의 비율을 저장할 딕셔너리
expected_freq_5_ratio = {}

# 각 독립 변수에 대해 기대빈도 계산 및 비율 확인
for column in train_cat_resampled_sum.columns[:-1]:
    # 교차 테이블 생성
    contingency_table = pd.crosstab(train_cat_resampled_sum[column], train_cat_resampled_sum['t-1감사의견코드'])

    # 카이제곱 검정 수행
    chi2, p_value, dof, expected_freq = chi2_contingency(contingency_table)

    # 기대빈도가 5 이하인 항목의 비율 계산
    expected_freq_5 = (expected_freq <= 5).mean()

    # 결과 저장
    expected_freq_5_ratio[column] = expected_freq_5

# 결과 출력
for column, ratio in expected_freq_5_ratio.items():
    print(f"변수 '{column}'의 기대빈도가 5 이하인 항목 비율: {ratio}")

변수 '기업수명주기'의 기대빈도가 5 이하인 항목 비율: 0.0
변수 '이보배초과여부'의 기대빈도가 5 이하인 항목 비율: 0.0
변수 '파부비초과여부'의 기대빈도가 5 이하인 항목 비율: 0.0
변수 '파당비초과여부'의 기대빈도가 5 이하인 항목 비율: 0.0
변수 '파차의초과여부'의 기대빈도가 5 이하인 항목 비율: 0.0
변수 '파로이초과여부'의 기대빈도가 5 이하인 항목 비율: 0.0


In [37]:
# 독립변수와 종속변수 간의 카이제곱 검정 수행
chi2_scores = []

for column in train_cat_resampled_sum.columns:
    # 교차 테이블 생성
    contingency_table = pd.crosstab(train_cat_resampled_sum[column], test_cat_resampled_sum['t-1감사의견코드'])
    chi2, p_value, dof, expected_freq = chi2_contingency(contingency_table)
    # 카이제곱 통계량(chi2), p-value(p_value), 자유도(dof), 예상빈도(expected_freq)
    print(p_value)
    chi2_scores.append((column, chi2))

# 카이제곱 검정 결과를 기준으로 변수 정렬
sorted_features = sorted(chi2_scores, key=lambda x : x[1], reverse=True)

# 선택된 변수 출력
sorted_features

0.3496723892614268
0.7304231296059767
0.09911012089860459
1.0
0.47842688234450925
0.4345607780174554
1.0


[('기업수명주기', 4.440405984271807),
 ('파부비초과여부', 2.719805029982248),
 ('파로이초과여부', 0.6106032186991674),
 ('파차의초과여부', 0.5024470412383892),
 ('이보배초과여부', 0.11872347424860538),
 ('파당비초과여부', 0.0),
 ('t-1감사의견코드', 0.0)]

In [38]:
chi2_scores

[('기업수명주기', 4.440405984271807),
 ('이보배초과여부', 0.11872347424860538),
 ('파부비초과여부', 2.719805029982248),
 ('파당비초과여부', 0.0),
 ('파차의초과여부', 0.5024470412383892),
 ('파로이초과여부', 0.6106032186991674),
 ('t-1감사의견코드', 0.0)]

In [39]:
import pandas as pd
from scipy.stats import chi2_contingency

# 독립변수와 종속변수 간의 카이제곱 검정 수행
chi2_scores = []

p_values = []
for column in test_cat_resampled_sum.columns:
    # 교차 테이블 생성
    contingency_table = pd.crosstab(test_cat_resampled_sum[column], test_cat_resampled_sum['t-1감사의견코드'])
    chi2, p_value, dof, expected_freq = chi2_contingency(contingency_table)
    p_values.append(p_value)
    chi2_scores.append((column, chi2))

# p-value가 0.05보다 작은 값을 출력하는 데이터프레임 생성
result_df_1 = pd.DataFrame({'Variable': test_cat_resampled_sum.columns, 'p-value': p_values})
filtered_df_chi = result_df_1[result_df_1['p-value'] < 0.05]

filtered_df_chi


Unnamed: 0,Variable,p-value
0,기업수명주기,1.575983e-152
1,이보배초과여부,0.02893872
2,파부비초과여부,1.793942e-84
4,파차의초과여부,1.3086660000000001e-93
5,파로이초과여부,2.714199e-22
6,t-1감사의견코드,0.0


In [40]:
result = pd.concat([result_df, filtered_df_chi], axis=0)

In [41]:
result.sort_values('p-value', ascending=True).reset_index(drop=True)

Unnamed: 0,Variable,p-value
0,순운전자본회전률,0.0
1,당좌자산회전률,0.0
2,총자본회전률,0.0
3,t-1감사의견코드,0.0
4,log자산총계,0.0
5,총자산대비잉여현금흐름,0.0
6,자기자본구성비율,0.0
7,유동자산회전률,1.830195e-282
8,자기자본회전률,2.56819e-268
9,차입금의존도,5.184425999999999e-236


---
# feature개수 정하기 위한 Logit

In [43]:
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
import statsmodels.api as sm
import numpy as np
lr_clf = LogisticRegression()

feature = train_resampled_sum.drop(['t-1감사의견코드'],axis=1)
target = train_resampled_sum[['t-1감사의견코드']]

logit = SelectFromModel(LogisticRegression())
logit.fit(feature, target)
logit_support = logit.get_support()
lr_feature = feature.loc[:,logit_support].columns.tolist()

In [44]:
lr_feature

['자기자본구성비율',
 '총자본순이익률',
 '순운전자본회전률',
 '당좌자산회전률',
 '유동자산회전률',
 '총자본증가율',
 '총자본투자효율',
 'log자산총계',
 '총자산대비현금흐름',
 '총자산대비잉여현금흐름',
 '이보배초과여부',
 '파당비초과여부',
 '파차의초과여부',
 '파로이초과여부']

In [45]:
len(lr_feature)

14

---

### Embedded Method

> Randomforeset

In [47]:
from sklearn.ensemble import RandomForestClassifier

In [51]:
selector = SelectFromModel(estimator=RandomForestClassifier(), threshold=0.02).fit(feature, target)
rf = selector.get_support()
count = np.count_nonzero(rf)
count

14

In [52]:
rf_features = feature.loc[:, rf].columns.tolist()
rf_features

['총자본회전률',
 '자기자본회전률',
 '순운전자본회전률',
 '재고자산회전률',
 '당좌자산회전률',
 '유동자산회전률',
 '매출액증가율',
 '총자본증가율',
 '자기자본증가율',
 '유형자산증가율',
 '유동자산증가율',
 'log자산총계',
 '매출액대비잉여현금흐름',
 '총자산대비잉여현금흐름']

> LASSO

In [57]:
lasso = SelectFromModel(estimator=LogisticRegression(penalty='l1', solver='liblinear', C=0.0021)).fit(feature, target)
lasso_support = lasso.get_support()
lasso_feature = feature.loc[:,lasso_support].columns.tolist()

In [58]:
len(lasso_feature)

14

# Wrapper Method

In [61]:
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SequentialFeatureSelector

In [62]:
selector = SequentialFeatureSelector(estimator=LogisticRegression(), n_features_to_select=14, direction='backward', scoring='f1', cv=5, n_jobs=-1)

# 변수 선택 수행
selector.fit(feature, target)

# 선택된 변수의 인덱스
selected_features = selector.get_support(indices=True)

# 선택된 변수 출력
for i in selected_features:
    print(feature.columns[i])

부채비율
자기자본구성비율
자기자본순이익률
총자본순이익률
자기자본회전률
순운전자본회전률
유동자산회전률
총자본증가율
자기자본증가율
총자본투자효율
log자산총계
총자산대비현금흐름
총자산대비잉여현금흐름
기업수명주기


In [63]:
selected_columns = feature.columns[selected_features]
wrapper_features = list(selected_columns)

---

# 종합

In [64]:
result = result.sort_values('p-value', ascending=True).reset_index(drop=True)
result = result.head(14)
filter = result[['Variable']]

In [65]:
rf_features = pd.DataFrame(rf_features)
lasso_features = pd.DataFrame(lasso_feature)
wrapper_features = pd.DataFrame(wrapper_features)

In [66]:
total = pd.concat([filter, rf_features, lasso_features, wrapper_features], axis=1)

In [67]:
total.columns = ['t&chi', 'rf', 'lasso', 'wrapper']
total

Unnamed: 0,t&chi,rf,lasso,wrapper
0,순운전자본회전률,총자본회전률,자기자본구성비율,부채비율
1,당좌자산회전률,자기자본회전률,총자본순이익률,자기자본구성비율
2,총자본회전률,순운전자본회전률,총자본회전률,자기자본순이익률
3,t-1감사의견코드,재고자산회전률,자기자본회전률,총자본순이익률
4,log자산총계,당좌자산회전률,순운전자본회전률,자기자본회전률
5,총자산대비잉여현금흐름,유동자산회전률,유동자산회전률,순운전자본회전률
6,자기자본구성비율,매출액증가율,총자본증가율,유동자산회전률
7,유동자산회전률,총자본증가율,자기자본증가율,총자본증가율
8,자기자본회전률,자기자본증가율,영업이익증가율,자기자본증가율
9,차입금의존도,유형자산증가율,총자본투자효율,총자본투자효율


In [68]:
filter = total['t&chi'].tolist()
rf = total['rf'].tolist()
lasso = total['lasso'].tolist()
wrapper = total['wrapper'].tolist()

In [69]:
feature.columns

Index(['부채비율', '당좌비율', '유동비율', '이자보상배율', '차입금의존도', '자기자본구성비율', '매출액영업이익률',
       '자기자본순이익률', '총자본순이익률', '총자본회전률', '자기자본회전률', '운전자본회전률', '순운전자본회전률',
       '재고자산회전률', '당좌자산회전률', '유동자산회전률', '매출액증가율', '총자본증가율', '자기자본증가율',
       '순이익증가율', '유형자산증가율', '유동자산증가율', '재고자산증가율', '영업이익증가율', '총자본투자효율',
       '부가가치율', '노동소득분배율', '자본분배율', '이윤분배율', 'log자산총계', 'OCF이자보상배율', '부채상환계수',
       '장기부채상환능력', '매출액대비금융비용상환능력', '연구개발비대비매출액', '매출액대비현금흐름', '매출액대비잉여현금흐름',
       '총자산대비현금흐름', '총자산대비영업현금흐름', '총자산대비잉여현금흐름', '기업수명주기', '이보배초과여부',
       '파부비초과여부', '파당비초과여부', '파차의초과여부', '파로이초과여부'],
      dtype='object')

In [70]:
total_result = pd.DataFrame(index=feature.columns)

# 각 컬럼의 값에 인덱스 포함 여부에 따라 True 또는 False 입력
total_result['t&chi'] = total_result.index.isin(filter)
total_result['wrapper'] = total_result.index.isin(wrapper)
total_result['rf'] = total_result.index.isin(rf)
total_result['lasso'] = total_result.index.isin(lasso)
total_result["true_sum"] = total_result.sum(axis=1)

total_result.sort_values('true_sum', ascending=False, inplace=True)
total_result

Unnamed: 0,t&chi,wrapper,rf,lasso,true_sum
자기자본증가율,True,True,True,True,4
log자산총계,True,True,True,True,4
총자본증가율,True,True,True,True,4
총자산대비잉여현금흐름,True,True,True,True,4
유동자산회전률,True,True,True,True,4
순운전자본회전률,True,True,True,True,4
자기자본회전률,True,True,True,True,4
총자본회전률,True,False,True,True,3
자기자본구성비율,True,True,False,True,3
기업수명주기,True,True,False,True,3


In [71]:
total_result_2 = total_result[total_result['true_sum']>=2]
total_result_2.reset_index()

Unnamed: 0,index,t&chi,wrapper,rf,lasso,true_sum
0,자기자본증가율,True,True,True,True,4
1,log자산총계,True,True,True,True,4
2,총자본증가율,True,True,True,True,4
3,총자산대비잉여현금흐름,True,True,True,True,4
4,유동자산회전률,True,True,True,True,4
5,순운전자본회전률,True,True,True,True,4
6,자기자본회전률,True,True,True,True,4
7,총자본회전률,True,False,True,True,3
8,자기자본구성비율,True,True,False,True,3
9,기업수명주기,True,True,False,True,3


In [169]:
total_result_2.index

Index(['총자산대비잉여현금흐름', 'log자산총계', '순운전자본회전률', '총자본증가율', '자기자본구성비율', '총자본회전률',
       '자기자본회전률', '자기자본증가율', '유동자산회전률', '총자산대비현금흐름', '총자본투자효율', '총자본순이익률'],
      dtype='object')

In [172]:
# train_resampled_sum.to_csv('Undersampling_0.33_train.csv',index=False,encoding='euc-kr')
# test_resampled_sum.to_csv('Undersampling_0.33_test.csv',index=False,encoding='euc-kr')

In [171]:
test_resampled_sum

Unnamed: 0,부채비율,당좌비율,유동비율,이자보상배율,차입금의존도,자기자본구성비율,매출액영업이익률,자기자본순이익률,총자본순이익률,총자본회전률,...,총자산대비현금흐름,총자산대비영업현금흐름,총자산대비잉여현금흐름,기업수명주기,이보배초과여부,파부비초과여부,파당비초과여부,파차의초과여부,파로이초과여부,t-1감사의견코드
0,-0.061624,-0.325007,-0.339707,-0.246380,1.117104,-0.790905,0.254925,0.681747,0.305841,-0.604741,...,-0.153880,-0.024641,0.281687,4,0,0,0,1,0,0.0
1,-0.179421,-0.231115,-0.247559,-0.237022,-0.762974,-0.265811,0.102876,0.045933,0.057940,0.439362,...,0.577278,0.801755,0.453992,3,0,0,0,0,0,0.0
2,-0.248261,0.307380,0.209787,1.738226,-0.440554,0.801199,0.174001,0.191731,0.758738,-0.194558,...,-0.471666,0.641726,-0.621153,2,0,0,0,0,0,0.0
3,-0.268688,0.381171,0.323125,-0.252721,-1.102673,1.632354,0.080539,-0.152066,-0.317998,-0.492873,...,0.614580,-0.159888,0.415724,4,0,0,0,0,0,0.0
4,-0.188684,-0.229299,-0.245591,-0.245533,-0.086511,-0.185730,0.119206,0.215025,0.274513,-0.001896,...,-0.231736,0.588569,0.841335,4,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11553,-0.067839,-0.276680,-0.277538,-0.243504,0.624834,-0.775182,0.095296,-0.059031,-0.174978,2.049022,...,0.166077,-0.275416,-0.516575,1,0,1,0,0,0,1.0
11554,-0.058383,-0.293656,-0.286147,-0.247062,-1.150596,-0.798584,0.117351,0.707848,0.468612,0.762537,...,-0.166783,0.576611,-0.517290,2,0,1,0,0,0,1.0
11555,-0.247958,-0.017578,-0.084320,-0.230940,-0.813179,0.792423,0.189081,0.731282,2.313571,0.706603,...,-0.122459,-0.228325,0.123369,4,0,0,0,0,0,1.0
11556,-0.187358,-0.255024,-0.275099,-0.246688,0.148213,-0.197797,0.105054,0.076103,0.103570,0.607165,...,-0.375487,-0.784810,-0.290083,5,0,0,0,0,0,1.0


In [72]:
total_result = pd.DataFrame(index=feature.columns)

# 각 컬럼의 값에 인덱스 포함 여부에 따라 True 또는 False 입력
total_result['t&chi'] = total_result.index.isin(filter)
total_result['wrapper'] = total_result.index.isin(wrapper)
# total_result['rf'] = total_result.index.isin(rf)
total_result['lasso'] = total_result.index.isin(lasso)
total_result["true_sum"] = total_result.sum(axis=1)

total_result.sort_values('true_sum', ascending=False, inplace=True)
total_result

Unnamed: 0,t&chi,wrapper,lasso,true_sum
유동자산회전률,True,True,True,3
기업수명주기,True,True,True,3
총자산대비잉여현금흐름,True,True,True,3
자기자본구성비율,True,True,True,3
log자산총계,True,True,True,3
자기자본회전률,True,True,True,3
순운전자본회전률,True,True,True,3
자기자본증가율,True,True,True,3
총자본증가율,True,True,True,3
총자산대비현금흐름,False,True,True,2


In [73]:
total_result_3 = total_result[total_result['true_sum']>=2]
total_result_3.reset_index()

Unnamed: 0,index,t&chi,wrapper,lasso,true_sum
0,유동자산회전률,True,True,True,3
1,기업수명주기,True,True,True,3
2,총자산대비잉여현금흐름,True,True,True,3
3,자기자본구성비율,True,True,True,3
4,log자산총계,True,True,True,3
5,자기자본회전률,True,True,True,3
6,순운전자본회전률,True,True,True,3
7,자기자본증가율,True,True,True,3
8,총자본증가율,True,True,True,3
9,총자산대비현금흐름,False,True,True,2


In [77]:
total_result_3.reset_index()[:11]['index'].to_list()

['유동자산회전률',
 '기업수명주기',
 '총자산대비잉여현금흐름',
 '자기자본구성비율',
 'log자산총계',
 '자기자본회전률',
 '순운전자본회전률',
 '자기자본증가율',
 '총자본증가율',
 '총자산대비현금흐름',
 '총자본투자효율']