# 0. 라이브러리

In [1]:
import pandas as pd
import numpy as np
import time

import OpenDartReader

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
api_key = '462c1cb331fdef6dbc18da2fc8253c2b774e925f'
dart = OpenDartReader(api_key)

- `finstate(corp, bsns_year, reprt_code)`
    - corp: 기업명
    - bsns_year: 사업연도
    - reprt_code: 보고서 유형 11011: 사업보고서, '11012': 반기보고서, '11013': 1분기보고서, '11014: 3분기보고서)
- 이 메서드는 아래 컬럼을 가지는 데이터프레임을 반환합니다.
    - account_nm: 계정명 ex. 자본총계, 영업이익, 당기순이익 등
    - fs_nm: 개별/연결명 ex. 연결재무제표 또는 재무제표
    - sj_nm: 재무제표명 ex. 재무상태표 또는 손익계산서
    - thstrm_dt: 당기일자  ex. 2019.12.31 현재, 2019.01.01 ~ 2019.12.31
    - thstrm_amount: 당기금액 ex. 166,009,000,000
- 계정명과 당기/전기/전전기 금액 컬럼을 바탕으로 우리가 원하는 값을 가져올 수 있습니다

In [4]:
result = dart.finstate("176440", 2021, "11011") # 2020년 래몽래인 사업보고서 내 재무 정보
result = result.loc[result.fs_nm == "연결재무제표"] # 연결재무제표 필터링

# account_nm이 당기순이익, 영업이익, 매출액 중 하나인 경우에만 가져오기
result = result.loc[result.account_nm.isin(['당기순이익', '영업이익', '매출액'])]

result = result.loc[:, ['account_nm', 'fs_nm', 'sj_nm', 'thstrm_dt', 'thstrm_amount']]
display(result)

Unnamed: 0,account_nm,fs_nm,sj_nm,thstrm_dt,thstrm_amount
9,매출액,연결재무제표,손익계산서,2021.01.01 ~ 2021.12.31,3761752259
10,영업이익,연결재무제표,손익계산서,2021.01.01 ~ 2021.12.31,-6416624178
12,당기순이익,연결재무제표,손익계산서,2021.01.01 ~ 2021.12.31,-5989487233


# 1. 재무변수 수집 함수

In [5]:
def find_financial_ind(corp_nm, yr, inds):
    report = dart.finstate(corp_nm, yr) # 데이터 가져오기

    if report is None:
        # 리포트가 없으면 당기, 전기, 전전기 값 모두 제거
        data = [[corp_nm, yr] + [np.nan] * len(inds)]
        data = [[corp_nm, yr-1] + [np.nan] * len(inds)]
        data = [[corp_nm, yr-2] + [np.nan] * len(inds)]
        return pd.DataFrame(data, columns=['기업명', '연도'] + inds)
    
    else:
        report = report[report.account_nm.isin(inds)]
        if sum(report.fs_nm == '연결재무제표') > 0:
            # 연결재무제표 데이터가 있으면 연결재무제표 사용
            report = report.loc[report.fs_nm == '연결재무제표']

        else:
            # 연결재무제표 데이터가 없으면 일반재무제표 사용
            report = report.loc[report.fs_nm == '재무제표']

        data = []
        for y, c in zip([yr, yr-1, yr-2], ['thstrm_amount', 'frmtrm_amount', 'bfefrmtrm_amount']):
            record = [corp_nm, y]
            for ind in inds:
                # account_nm이 ind인 행의 c 컬럼 값을 가져 옴
                if sum(report.account_nm == ind) > 0:
                    value = report.loc[report.account_nm == ind, c].iloc[0]
                else:
                    value = np.nan

                record.append(value)
            
            data.append(record)

        return pd.DataFrame(data, columns=['기업명', '연도'] + inds)

In [6]:
def str_to_float(value):
    if type(value) == float:
        return value
    elif value == '-':
        return 0
    else:
        return float(value.replace(',', ''))

In [7]:
inds = ['자산총계', '부채총계', '자본총계', '매출액', '영업이익', '당기순이익', '유동자산', '유동부채', '이자비용']
display(find_financial_ind('삼성전자', 2020, inds))

Unnamed: 0,기업명,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동자산,유동부채,이자비용
0,삼성전자,2020,378235718000000,102287702000000,275948016000000,236806988000000,35993876000000,26407832000000,198215579000000,75604351000000,
1,삼성전자,2019,352564497000000,89684076000000,262880421000000,230400881000000,27768509000000,21738865000000,181385260000000,63782764000000,
2,삼성전자,2018,339357244000000,91604067000000,247753177000000,243771415000000,58886669000000,44344857000000,174697424000000,69081510000000,


# 2. 정상기업 재무 변수 수집

In [8]:
list_corp = pd.read_csv('./data/정상기업재무실패.csv', dtype={'종목코드':str})
list_codes = list_corp.종목코드.to_list()
list_codes[:3]

['382480', '391710', '260970']

In [9]:
inds = ['자산총계', '부채총계', '자본총계', '매출액', '영업이익', '당기순이익',\
    '유동부채', '유동자산', '비유동자산', '비유동부채']

list_data = pd.DataFrame()

for idx, corp_cd in enumerate(list_codes):
    print(idx+1, "/", len(list_codes))
    yr = 2022
    try:
        result = find_financial_ind(corp_cd, yr, inds)
    except:
        pass

    list_data = pd.concat([list_data, result], axis=0, ignore_index=True)
    time.sleep(0.5)

for ind in inds:
    list_data[ind] = list_data[ind].apply(str_to_float)

1 / 243
2 / 243
3 / 243
4 / 243
5 / 243
6 / 243
7 / 243
8 / 243
9 / 243
10 / 243
11 / 243
12 / 243
13 / 243
14 / 243
15 / 243
16 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

17 / 243
18 / 243
19 / 243
20 / 243
21 / 243
22 / 243
23 / 243
24 / 243
25 / 243
26 / 243
27 / 243
28 / 243
29 / 243
30 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

31 / 243
32 / 243
33 / 243
34 / 243
35 / 243
36 / 243
37 / 243
38 / 243
39 / 243
40 / 243
41 / 243
42 / 243
43 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

44 / 243
45 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

46 / 243
47 / 243
48 / 243
49 / 243
50 / 243
51 / 243
52 / 243
53 / 243
54 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

55 / 243
56 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

57 / 243
58 / 243
59 / 243
60 / 243
61 / 243
62 / 243
63 / 243
{'status': '013', 'message': '조회된 데이타가 없습니다.'}

64 / 243
65 / 243
66 / 243
67 / 243
68 / 243
69 / 243
70 / 243
71 / 243
72 / 243
73 / 243
74 / 243
75 / 24

In [10]:
list_data_raw = list_data.copy()
list_data_raw.to_csv('./data/정상기업재무원본2.csv', index=False)

In [11]:
list_data.sort_values(by=['기업명', '연도'], inplace=True)
list_data.drop_duplicates(inplace=True)

In [12]:
list_data.head(3)

Unnamed: 0,기업명,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채
665,2680,2020,140019400000.0,80961240000.0,59058120000.0,77324080000.0,-10308730000.0,-15640070000.0,73654510000.0,36232240000.0,103787100000.0,7306729000.0
664,2680,2021,136456500000.0,84053840000.0,52402700000.0,78892140000.0,-4630638000.0,-9711257000.0,74924950000.0,37101370000.0,99355170000.0,9128885000.0
663,2680,2022,142972600000.0,92772270000.0,50200340000.0,99124020000.0,329634800.0,-6706759000.0,86171460000.0,53491520000.0,89481100000.0,6600818000.0


In [13]:
list_data.isnull().sum()

기업명       0
연도        0
자산총계      0
부채총계      3
자본총계      0
매출액      12
영업이익      0
당기순이익     0
유동부채      0
유동자산      0
비유동자산     3
비유동부채     0
dtype: int64

# 3. 결측치 knn

In [14]:
from sklearn.impute import KNNImputer

null_cols = list_data.iloc[:, 1:].columns

imputer=KNNImputer(n_neighbors=5)
filled_list_data = imputer.fit_transform(list_data.iloc[:, 1:])
filled_list_data = pd.DataFrame(filled_list_data, columns=list_data.iloc[:, 1:].columns)

In [15]:
filled_list_data.shape, list_data.shape

((672, 11), (672, 12))

In [16]:
filled_list_data.isnull().sum()

연도       0
자산총계     0
부채총계     0
자본총계     0
매출액      0
영업이익     0
당기순이익    0
유동부채     0
유동자산     0
비유동자산    0
비유동부채    0
dtype: int64

In [17]:
list_data = list_data.reset_index().drop(['index'], axis=1)
list_data[null_cols] = filled_list_data

In [18]:
list_data.shape

(672, 12)

In [19]:
list_data.isnull().sum()

기업명      0
연도       0
자산총계     0
부채총계     0
자본총계     0
매출액      0
영업이익     0
당기순이익    0
유동부채     0
유동자산     0
비유동자산    0
비유동부채    0
dtype: int64

# 4. 재무 비율 변수 구성
- https://gils-lab.tistory.com/38
- https://dacon.io/competitions/official/235946/codeshare/5805
- https://blog.naver.com/o12486vs2/222096044791

### (1) 건전성

In [20]:
# 부채비율 = 총부채 / 총자산 DR (LEV)
list_data['부채비율'] = list_data['부채총계'] / list_data['자본총계'] * 100

# 자기자본비율 DER
list_data['자기자본비율'] = list_data['부채총계'] / list_data['자산총계'] * 100

# 유동부채비율
list_data['유동부채비율'] = list_data['유동부채'] / list_data['자본총계'] * 100

# 유동부채비율
list_data['비유동부채비율'] = list_data['비유동부채'] / list_data['자본총계'] * 100


### (2) 수익성

In [21]:
# 총자산영업이익율 OI/TA
list_data['총자산영업이익율'] = list_data['영업이익'] / list_data['자산총계']

# 총자산순이익율 ROA
list_data['ROA'] = list_data['당기순이익'] / list_data['자산총계']

# 자기자본이익율 ROE
avg_eq = list_data['자본총계'].rolling(2).mean()
list_data['ROE'] = list_data['당기순이익'] / avg_eq
list_data.loc[list_data.연도 == 2020, 'ROE'] = np.nan

# 매출액영업이익율 OPM
list_data['매출액영업이익율'] = list_data['영업이익'] / list_data['매출액']

# 매출액순이익율
list_data['매출액순이익율'] = list_data['당기순이익'] / list_data['매출액']


### (3) 성장성

In [22]:
list_data['총자산증가율'] = list_data['자산총계'].diff() / list_data['자산총계'] * 100
list_data.loc[list_data.연도 == 2020, '총자산증가율'] = np.nan

list_data['매출액증가율'] = list_data['매출액'].diff() / list_data['매출액'] * 100
list_data.loc[list_data.연도 == 2020, '매출액증가율'] = np.nan

list_data['당기순이익증가율'] = list_data['당기순이익'].diff() / list_data['당기순이익'] * 100
list_data.loc[list_data.연도 == 2020, '당기순이익증가율'] = np.nan

list_data['영업이익증가율'] = list_data['영업이익'].diff() / list_data['영업이익'] * 100
list_data.loc[list_data.연도 == 2020, '영업이익증가율'] = np.nan

### (4) 유동성

In [23]:
# 유동비율 LIQ
list_data['유동비율'] = list_data['유동자산'] / list_data['유동부채']

### (5) 활동성

In [24]:
# 자산회전율
list_data['자산회전율'] = list_data['매출액'] / list_data['자산총계']

# 부채회전율
list_data['부채회전율'] = list_data['매출액'] / list_data['부채총계']

# 자본회전율
list_data['자본회전율'] = list_data['매출액'] / list_data['자본총계']


### (6) 규모

In [25]:
# 총매출액규모
list_data['총매출액규모'] = np.log(list_data['매출액'])

# 총매출액규모
list_data['총자산규모'] = np.log(list_data['자산총계'])

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [26]:
list_data.head()

Unnamed: 0,기업명,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채,부채비율,자기자본비율,유동부채비율,비유동부채비율,총자산영업이익율,ROA,ROE,매출액영업이익율,매출액순이익율,총자산증가율,매출액증가율,당기순이익증가율,영업이익증가율,유동비율,자산회전율,부채회전율,자본회전율,총매출액규모,총자산규모
0,2680,2020.0,140019400000.0,80961240000.0,59058120000.0,77324080000.0,-10308730000.0,-15640070000.0,73654510000.0,36232240000.0,103787100000.0,7306729000.0,137.087394,57.821461,124.715294,12.3721,-0.073624,-0.111699,,-0.133318,-0.202266,,,,,0.491922,0.552238,0.955075,1.309288,25.071271,25.665047
1,2680,2021.0,136456500000.0,84053840000.0,52402700000.0,78892140000.0,-4630638000.0,-9711257000.0,74924950000.0,37101370000.0,99355170000.0,9128885000.0,160.399833,61.597518,142.979194,17.420639,-0.033935,-0.071167,-0.174254,-0.058696,-0.123095,-2.610958,1.987601,-61.05091,-122.620088,0.49518,0.578148,0.938591,1.505498,25.091347,25.639272
2,2680,2022.0,142972600000.0,92772270000.0,50200340000.0,99124020000.0,329634800.0,-6706759000.0,86171460000.0,53491520000.0,89481100000.0,6600818000.0,184.804059,64.888141,171.655108,13.14895,0.002306,-0.046909,-0.130732,0.003325,-0.06766,4.557574,20.410679,-44.798068,1504.778208,0.620757,0.693308,1.068466,1.974569,25.319638,25.685919
3,3100,2020.0,705889900000.0,400608500000.0,305281400000.0,151729400000.0,28581660000.0,6824766000.0,85441410000.0,88569830000.0,617320100000.0,315167100000.0,131.225976,56.752264,27.987756,103.23822,0.04049,0.009668,,0.188373,0.04498,,,,,1.036615,0.214948,0.378747,0.497015,25.745365,27.282725
4,3100,2021.0,689325900000.0,361428400000.0,327897500000.0,162739200000.0,32163550000.0,24033560000.0,82853760000.0,103203400000.0,586122600000.0,278574700000.0,110.226041,52.432154,25.268189,84.957852,0.046659,0.034865,0.075914,0.197639,0.147681,-2.402922,6.765285,71.603185,11.136514,1.245609,0.236085,0.450267,0.496311,25.815415,27.25898


In [27]:
list_data = list_data[list_data.연도 == 2022]
list_data.head()

Unnamed: 0,기업명,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채,부채비율,자기자본비율,유동부채비율,비유동부채비율,총자산영업이익율,ROA,ROE,매출액영업이익율,매출액순이익율,총자산증가율,매출액증가율,당기순이익증가율,영업이익증가율,유동비율,자산회전율,부채회전율,자본회전율,총매출액규모,총자산규모
2,2680,2022.0,142972600000.0,92772270000.0,50200340000.0,99124020000.0,329634800.0,-6706759000.0,86171460000.0,53491520000.0,89481100000.0,6600818000.0,184.804059,64.888141,171.655108,13.14895,0.002306,-0.046909,-0.130732,0.003325,-0.06766,4.557574,20.410679,-44.798068,1504.778208,0.620757,0.693308,1.068466,1.974569,25.319638,25.685919
5,3100,2022.0,634632200000.0,293046500000.0,341585600000.0,172141400000.0,26761770000.0,14691300000.0,61235800000.0,78632920000.0,555999200000.0,231810700000.0,85.790066,46.175809,17.926925,67.863141,0.042169,0.023149,0.043888,0.155464,0.085344,-8.618183,5.461915,-63.590413,-20.184688,1.284101,0.271246,0.58742,0.503948,25.871582,27.176311
8,5710,2022.0,559314500000.0,141652100000.0,417662400000.0,737040600000.0,31395680000.0,37657870000.0,126737300000.0,396902600000.0,162411800000.0,14914800000.0,33.915456,25.326021,30.344438,3.571018,0.056132,0.067329,0.0954,0.042597,0.051093,1.45751,-3.472781,13.047783,4.062643,3.131696,1.317757,5.203175,1.76468,27.325909,27.049978
11,7530,2022.0,143148000000.0,100620100000.0,42527830000.0,146324300000.0,4634069000.0,2486873000.0,78503620000.0,60056810000.0,83091140000.0,22116500000.0,236.598307,70.290997,184.593524,52.004782,0.032373,0.017373,0.060746,0.03167,0.016996,3.125388,16.026064,77.731627,66.676405,0.76502,1.022189,1.454225,3.440673,25.709091,25.687145
14,7680,2022.0,539386500000.0,176293300000.0,363093200000.0,358311800000.0,17849130000.0,19510880000.0,129151500000.0,479237300000.0,60149150000.0,47141820000.0,48.55318,32.684039,35.569786,12.983394,0.033092,0.036172,0.055063,0.049815,0.054452,-0.435498,19.261249,-54.835714,-33.928784,3.710661,0.664295,2.032476,0.986832,26.604669,27.013698


In [28]:
list_data = list_data.reset_index().drop(['index'], axis=1)
list_data.shape

(224, 31)

In [29]:
list_data.isnull().sum()

기업명         0
연도          0
자산총계        0
부채총계        0
자본총계        0
매출액         0
영업이익        0
당기순이익       0
유동부채        0
유동자산        0
비유동자산       0
비유동부채       0
부채비율        0
자기자본비율      0
유동부채비율      0
비유동부채비율     0
총자산영업이익율    0
ROA         0
ROE         0
매출액영업이익율    0
매출액순이익율     0
총자산증가율      0
매출액증가율      1
당기순이익증가율    0
영업이익증가율     0
유동비율        0
자산회전율       0
부채회전율       0
자본회전율       0
총매출액규모      0
총자산규모       0
dtype: int64

In [36]:
list_data.rename(columns={'기업명':'종목코드'}, inplace=True)
list_data.head()

Unnamed: 0,종목코드,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채,부채비율,자기자본비율,유동부채비율,비유동부채비율,총자산영업이익율,ROA,ROE,매출액영업이익율,매출액순이익율,총자산증가율,매출액증가율,당기순이익증가율,영업이익증가율,유동비율,자산회전율,부채회전율,자본회전율,총매출액규모,총자산규모
0,2680,2022.0,142972600000.0,92772270000.0,50200340000.0,99124020000.0,329634800.0,-6706759000.0,86171460000.0,53491520000.0,89481100000.0,6600818000.0,184.804059,64.888141,171.655108,13.14895,0.002306,-0.046909,-0.130732,0.003325,-0.06766,4.557574,20.410679,-44.798068,1504.778208,0.620757,0.693308,1.068466,1.974569,25.319638,25.685919
1,3100,2022.0,634632200000.0,293046500000.0,341585600000.0,172141400000.0,26761770000.0,14691300000.0,61235800000.0,78632920000.0,555999200000.0,231810700000.0,85.790066,46.175809,17.926925,67.863141,0.042169,0.023149,0.043888,0.155464,0.085344,-8.618183,5.461915,-63.590413,-20.184688,1.284101,0.271246,0.58742,0.503948,25.871582,27.176311
2,5710,2022.0,559314500000.0,141652100000.0,417662400000.0,737040600000.0,31395680000.0,37657870000.0,126737300000.0,396902600000.0,162411800000.0,14914800000.0,33.915456,25.326021,30.344438,3.571018,0.056132,0.067329,0.0954,0.042597,0.051093,1.45751,-3.472781,13.047783,4.062643,3.131696,1.317757,5.203175,1.76468,27.325909,27.049978
3,7530,2022.0,143148000000.0,100620100000.0,42527830000.0,146324300000.0,4634069000.0,2486873000.0,78503620000.0,60056810000.0,83091140000.0,22116500000.0,236.598307,70.290997,184.593524,52.004782,0.032373,0.017373,0.060746,0.03167,0.016996,3.125388,16.026064,77.731627,66.676405,0.76502,1.022189,1.454225,3.440673,25.709091,25.687145
4,7680,2022.0,539386500000.0,176293300000.0,363093200000.0,358311800000.0,17849130000.0,19510880000.0,129151500000.0,479237300000.0,60149150000.0,47141820000.0,48.55318,32.684039,35.569786,12.983394,0.033092,0.036172,0.055063,0.049815,0.054452,-0.435498,19.261249,-54.835714,-33.928784,3.710661,0.664295,2.032476,0.986832,26.604669,27.013698


In [40]:
list_data.to_csv('./data/정상기업재무2.csv', index=False)

# 실패한 애들 알아보기

In [41]:
list_corp = pd.read_csv('./data/정상기업.csv', dtype={'종목코드':str})
print(list_corp.shape)
list_corp.head()

(1455, 4)


Unnamed: 0,기업명,종목코드,상장일,지속기간
0,래몽래인,200350,2021-12-30,670 days
1,툴젠,199800,2021-12-10,690 days
2,마음AI,377480,2021-11-23,707 days
3,알비더블유,361570,2021-11-22,708 days
4,바이옵트로,222160,2021-11-18,712 days


In [42]:
list_done = pd.read_csv('./data/정상기업재무.csv', dtype={'종목코드':str})
print(list_done.shape)
list_done.head()

(1212, 31)


Unnamed: 0,기업명,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채,부채비율,자기자본비율,유동부채비율,비유동부채비율,총자산영업이익율,ROA,ROE,매출액영업이익율,매출액순이익율,총자산증가율,매출액증가율,당기순이익증가율,영업이익증가율,유동비율,자산회전율,부채회전율,자본회전율,총매출액규모,총자산규모
0,3S,2022.0,65127860000.0,26788880000.0,38338990000.0,27110630000.0,1452134000.0,1171370000.0,24423670000.0,28885010000.0,36242850000.0,2365204000.0,69.873715,41.132741,63.704527,6.169188,0.022297,0.017986,0.031284,0.053563,0.043207,16.142265,13.399616,40.639176,196.979059,1.182665,0.416268,1.012011,0.707129,24.023192,24.899618
1,APS,2022.0,367637600000.0,155030400000.0,212607100000.0,40821930000.0,-15269630000.0,6392560000.0,102125300000.0,97455970000.0,270181600000.0,52905130000.0,72.918737,42.169367,48.034755,24.883982,-0.041534,0.017388,0.031973,-0.374054,0.156596,8.30846,3.445238,-268.957924,74.042775,0.954278,0.111039,0.263316,0.192006,24.432485,26.630364
2,AP시스템,2022.0,548112900000.0,277913700000.0,270199100000.0,486607600000.0,90485160000.0,82237670000.0,222421200000.0,396470600000.0,151642300000.0,55492550000.0,102.85515,50.703741,82.317502,20.537648,0.165085,0.150038,0.356206,0.185951,0.169002,21.78252,-8.658585,30.486565,28.932548,1.782522,0.887787,1.75093,1.800922,26.910724,27.029747
3,AP위성,2022.0,109299400000.0,23948350000.0,85351040000.0,53604130000.0,4494271000.0,4077471000.0,23802340000.0,98071390000.0,11227990000.0,146004800.0,28.058646,21.910778,27.887582,0.171064,0.041119,0.037306,0.048992,0.083842,0.076066,-1.728078,25.002563,65.622301,81.260622,4.120241,0.490434,2.238323,0.628043,24.704892,25.417357
4,BGF에코머티리얼즈,2022.0,352143700000.0,150198700000.0,201945100000.0,263301600000.0,15925460000.0,28879580000.0,117764400000.0,253180300000.0,98963460000.0,32434240000.0,74.375997,42.652658,58.315074,16.060923,0.045224,0.082011,0.161303,0.060484,0.109683,12.886848,22.729716,97.253119,-39.82806,2.149888,0.747711,1.753022,1.303828,26.296566,26.587305


In [43]:
list_done2 = pd.read_csv('./data/정상기업재무2.csv', dtype={'종목코드':str})
print(list_done2.shape)
list_done2.head()

(224, 31)


Unnamed: 0,종목코드,연도,자산총계,부채총계,자본총계,매출액,영업이익,당기순이익,유동부채,유동자산,비유동자산,비유동부채,부채비율,자기자본비율,유동부채비율,비유동부채비율,총자산영업이익율,ROA,ROE,매출액영업이익율,매출액순이익율,총자산증가율,매출액증가율,당기순이익증가율,영업이익증가율,유동비율,자산회전율,부채회전율,자본회전율,총매출액규모,총자산규모
0,2680,2022.0,142972600000.0,92772270000.0,50200340000.0,99124020000.0,329634800.0,-6706759000.0,86171460000.0,53491520000.0,89481100000.0,6600818000.0,184.804059,64.888141,171.655108,13.14895,0.002306,-0.046909,-0.130732,0.003325,-0.06766,4.557574,20.410679,-44.798068,1504.778208,0.620757,0.693308,1.068466,1.974569,25.319638,25.685919
1,3100,2022.0,634632200000.0,293046500000.0,341585600000.0,172141400000.0,26761770000.0,14691300000.0,61235800000.0,78632920000.0,555999200000.0,231810700000.0,85.790066,46.175809,17.926925,67.863141,0.042169,0.023149,0.043888,0.155464,0.085344,-8.618183,5.461915,-63.590413,-20.184688,1.284101,0.271246,0.58742,0.503948,25.871582,27.176311
2,5710,2022.0,559314500000.0,141652100000.0,417662400000.0,737040600000.0,31395680000.0,37657870000.0,126737300000.0,396902600000.0,162411800000.0,14914800000.0,33.915456,25.326021,30.344438,3.571018,0.056132,0.067329,0.0954,0.042597,0.051093,1.45751,-3.472781,13.047783,4.062643,3.131696,1.317757,5.203175,1.76468,27.325909,27.049978
3,7530,2022.0,143148000000.0,100620100000.0,42527830000.0,146324300000.0,4634069000.0,2486873000.0,78503620000.0,60056810000.0,83091140000.0,22116500000.0,236.598307,70.290997,184.593524,52.004782,0.032373,0.017373,0.060746,0.03167,0.016996,3.125388,16.026064,77.731627,66.676405,0.76502,1.022189,1.454225,3.440673,25.709091,25.687145
4,7680,2022.0,539386500000.0,176293300000.0,363093200000.0,358311800000.0,17849130000.0,19510880000.0,129151500000.0,479237300000.0,60149150000.0,47141820000.0,48.55318,32.684039,35.569786,12.983394,0.033092,0.036172,0.055063,0.049815,0.054452,-0.435498,19.261249,-54.835714,-33.928784,3.710661,0.664295,2.032476,0.986832,26.604669,27.013698


In [46]:
list_corps_done = list_done.기업명
list_corps_done2 = list_done2.종목코드

list_corps_fail = list_corp[~list_corp.기업명.isin(list_corps_done)]
list_corps_fail = list_corps_fail[~list_corps_fail.종목코드.isin(list_corps_done2)]
print(list_corps_fail.shape)
list_corps_fail.head()

(19, 4)


Unnamed: 0,기업명,종목코드,상장일,지속기간
112,코스텍시스,355150,2020-09-18,1138 days
173,코리아에셋투자증권,190650,2019-11-20,1441 days
224,컴퍼니케이,307930,2019-05-23,1622 days
231,미래에셋벤처투자,100790,2019-03-15,1691 days
265,아주IB투자,27360,2018-11-21,1805 days


In [55]:
list_corps_fail.to_csv('./data/정상기업재무실패2.csv', index=False)

In [54]:
result = dart.finstate("190650", 2021, "11011") # 2020년 래몽래인 사업보고서 내 재무 정보
result = result.loc[result.fs_nm == "연결재무제표"] # 연결재무제표 필터링

# account_nm이 당기순이익, 영업이익, 매출액 중 하나인 경우에만 가져오기
result = result.loc[result.account_nm.isin(['당기순이익', '영업이익', '매출액'])]

result = result.loc[:, ['account_nm', 'fs_nm', 'sj_nm', 'thstrm_dt', 'thstrm_amount']]
display(result)

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



AttributeError: 'DataFrame' object has no attribute 'fs_nm'