## Import

In [1]:
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from tqdm import tqdm
from bs4 import BeautifulSoup

import requests
import streamlit as st
import pandas as pd
import numpy as np
import OpenDartReader
import warnings
import dart_fss
import time, datetime
import re, os

warnings.filterwarnings('ignore')

* functions

In [2]:
def cleansing(x):
    return x.replace('TE', 'TD').replace('TU', 'TD')

def get_table(xml_text, title):
    table_src = re.findall('{}</TITLE>.*?</TABLE-GROUP>'.format(title), xml_text)
    value = pd.read_html(cleansing(table_src[0]))
    
    if len(value) > 1:
        value = value[1]
    else:
        value = value[0]
        
    return value

def get_listing_dt(xml_text):
    raw_txt = re.findall('상장일\(매매개시일\).*?<P>', xml_text)
    if len(raw_txt) == 1:
        raw_txt = raw_txt[0].replace(" ", "")
        date = re.findall('\d{4}년\d{2}월\d{2}일', raw_txt)
        if len(date) == 1:
            date = date[0]
            date = re.sub("일", "", re.sub("[년월]", ".", date))
    else:
        raw_txt = re.findall('주권 상장 및 유통 예정일.*?<P>', xml_text)
        if len(raw_txt) == 1:
            raw_txt = raw_txt[0].replace(" ", "")
            date = re.findall('\d{4}년\d{2}월\d{2}일', raw_txt)
            if len(date) == 1:
                date = date[0]
                date = re.sub("일", "", re.sub("[년월]", ".", date))
        else:
            date = '-'
    return date

def set_num(x):
    if type(x) != str:
        return x
    else:
        return float(re.sub("[^0-9]", "", x))
    
def get_issuance3(info_df, corp_name):
    rcept_no = info_df.loc[info_df.corp_name == corp_name].rcept_no.values[0]

    xml_text = dart.document(rcept_no)
    xml_text = xml_text.replace("\n", "")

    dt_table = get_table(xml_text, '청약 및 납입일정')
    ov_table = get_table(xml_text, '발행 개요')
    corp_table = get_table(xml_text, '인수기관별 인수금액')

    dt_table = dt_table.loc[:, ['구분', '청약개시일', '청약종료일', '납입기일']]
    ov_table = ov_table.loc[:, ['발행총액']]
    corp_table = corp_table.loc[corp_table['인수기관'] != "계", ['인수기관', '인수수량', '인수금액', '비 율(%)', '비 고']]

    dt_table['rcept_no'] = [rcept_no for x in range(dt_table.shape[0])]
    ov_table['rcept_no'] = [rcept_no for x in range(ov_table.shape[0])]
    corp_table['rcept_no'] = [rcept_no for x in range(corp_table.shape[0])]

    pub_df = pd.merge(dt_table, ov_table, on = 'rcept_no', how = 'inner')
    pub_df = pd.merge(pub_df, corp_table, on = 'rcept_no', how = 'inner')
    pub_df['회사명'] = [corp_name for _ in range(pub_df.shape[0])]

    pub_df = pub_df.loc[[True if "공모" in x else False for x in pub_df['구분']]]

    if (pub_df.shape[0] == 0) or (pub_df['인수수량'].values[0] == '-'):
        return pd.DataFrame(columns = ['회사명', '청약개시일', '청약종료일', '납입기일', '구분', '인수기관', '인수수량', '인수금액', '비 율(%)', '비 고'])
    else:
        del pub_df['rcept_no']

        listing_dt = get_listing_dt(xml_text)
        select_cols = ['회사명', '청약개시일', '청약종료일', '납입기일', '구분', '인수기관', '인수수량', '인수금액', '비 율(%)', '비 고']
        change_cols = {"청약개시일":"청약일",
                       "납입기일":"납입일",
                       "인수수량":"공모주수",
                       "비 율(%)": "인수비율",
                       "비 고": "주관형태"}

        base_df = pub_df.loc[:, select_cols]
        # 증권사 이름 전처리
        base_df['인수기관'] = [x if type(x) != str else change_corp(x) for x in base_df['인수기관']]
        
        base_df['확정발행가액'] = base_df['인수금액']/base_df['인수수량']
        base_df['상장일'] = [listing_dt for _ in range(base_df.shape[0])]
        base_df['구분'] = ['유가증권' if "유가증권" in x else "코스닥" for x in base_df['구분']]
        base_df = base_df.rename(columns = change_cols)
        
        return base_df
    
def get_kind(driver, start_dt, end_dt):
    ## 01.KIND 접속
    driver.get("https://kind.krx.co.kr/listinvstg/listingcompany.do?method=searchListingTypeMain")

    wait = WebDriverWait(driver, 10, poll_frequency=0.25)
    wait.until(EC.presence_of_element_located((By.ID, "fromDate")))

    ## 02. 시작-종료 날짜 설정 후 검색
    start_date = driver.find_element(By.ID, 'fromDate')
    start_date.clear()
    time.sleep(0.05)
    start_date.send_keys(start_dt)
    start_date.send_keys(Keys.ESCAPE)
    time.sleep(0.05)

    end_date = driver.find_element(By.ID, 'toDate')
    end_date.clear()
    time.sleep(0.05)
    end_date.send_keys(end_dt)
    end_date.send_keys(Keys.ESCAPE)

    time.sleep(0.05)
    ## 03. 신규상장만 활용
    check_box = '/html/body/section[2]/section/form/section/div/div[1]/table/tbody/tr[7]/td/label[{}]'

    for idx in range(4):
        driver.find_element(By.XPATH, check_box.format(idx+2)).click()

    search_element = driver.find_element(By.CLASS_NAME, 'btn-sprite.type-00.vmiddle.search-btn')
    search_element.click()
    
    time.sleep(1.5)
    cnt_element = driver.find_element(By.XPATH, '/html/body/section[2]/section/article/section[2]/div[2]/em')
    loop_cnt = int(np.floor(int(cnt_element.text) / 15)) + 1
    
    time.sleep(0.05)
    for tot_idx in range(loop_cnt):
        ### 모든 데이터 수집
        temp_df = pd.read_html(driver.page_source)
        listing_df = [x for x in temp_df if "회사명" in x and "상장유형" in x][0]

        s_list, m_list, r_list, n_list, o_list, c_list = [], [], [], [], [], []

        for idx in range(listing_df.shape[0]):
            row_element = driver.find_element(By.XPATH, "/html/body/section[2]/section/article/section[1]/table/tbody/tr[{}]".format(idx+1))
            row_element.click()

            time.sleep(0.25)

            driver.switch_to.window(driver.window_handles[1])
            time.sleep(0.25)

            wait = WebDriverWait(driver, 10, poll_frequency=0.25)
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table:nth-child(6) > tbody > tr:nth-child(2) > td:nth-child(4)")))

            # 상장 주식수
            s_cnt = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(6) > tbody > tr:nth-child(2) > td:nth-child(4)").text
            # 주요 제품
            m_product = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(3) > tbody > tr:nth-child(6) > td").text

            # switch tab
            driver.find_element(By.XPATH, '/html/body/form/section/nav/ul/li[3]/a').click()
            time.sleep(0.25)

            wait = WebDriverWait(driver, 10, poll_frequency=0.25)
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td")))

            # 경쟁률
            ratio = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td').text
            # 신주모집
            new_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(2) > td:nth-child(2)').text
            # 구주매출
            old_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(3) > td:nth-child(2)').text
            # 기관 배정 수량
            c_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-foot.mt3 > tbody > tr:nth-child(3) > td:nth-child(2)').text

            s_list.append(s_cnt)
            m_list.append(m_product)
            r_list.append(ratio)
            n_list.append(new_s)
            o_list.append(old_s)
            c_list.append(c_cnt)

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
            time.sleep(0.1)

        o_list = [set_num(x) if x != '-' else 0 for x in o_list]
        n_list = [set_num(x) if x != '-' else 0 for x in n_list]
        s_list = [set_num(x) if x != '-' else 0 for x in s_list]
        c_list = [set_num(x) if x != '-' else 0 for x in c_list]

        right_df = pd.DataFrame({"회사명": listing_df['회사명'], "상장주식수": s_list, "신주모집": n_list, "구주매출": o_list, "경쟁률": r_list, "기관배정수량": c_list, "주요제품": m_list})

        select_cols = ['회사명', '상장일']
        select_cols.extend(right_df.columns[1:])

        if tot_idx == 0:
            output_df = pd.merge(listing_df, right_df, on = '회사명', how = 'inner')[select_cols]
        else:
            append_df = pd.merge(listing_df, right_df, on = '회사명', how = 'inner')[select_cols]
            output_df = pd.concat([output_df, append_df])

        ## 다음 페이지 진입
        next_element = driver.find_element(By.CLASS_NAME, 'next')
        next_element.click()
        time.sleep(3)
        
    return output_df


def make_fee(x):
    x['청약수수료추정'] = (x['확정발행가액'] * x['기관배정수량'] * x['인수비율'] * 0.0001) / 100000000
    return x

def change_form(df, idx):
    if idx == 1:
        import copy
        i_df = copy.deepcopy(df)
        f_df = make_fee(i_df)

        change_cols = {"인수수수료(천원)":"인수수수료"}

        f_df['인수수수료(천원)'] /= 100000
        f_df['청약수수료추정'] = [x if "스팩" not in y else 0 for x, y in zip(f_df['청약수수료추정'], f_df['회사명'])]
        f_df['수수료합계'] = f_df['인수수수료(천원)'] + f_df['청약수수료추정']

        select_cols = ['상장일', '회사명', '구분', '인수기관', '공모금액(백만원)', '인수수수료(천원)', '청약수수료추정', '수수료합계', '주관형태',
                      '확정발행가액', '인수비율', '기관배정수량', '청약일', '납입일']

        # 공모금액 합계, 주관형태, 상장트랙
        o_df = f_df.loc[:, select_cols]
        o_df.columns = ['상장일', '업체', '시장구분', '인수회사', '인수금액', '인수수수료', '청약수수료추정', '수수료합계', '주관형태',
                      '공모가', '인수비율', '총기관배정수량', '청약일', '납입일']

        check_df = pd.DataFrame(o_df.groupby('업체')['인수금액'].sum()).reset_index()
        check_df.rename(columns = {"인수금액":"발행금액"}, inplace = True)

        o_df = pd.merge(o_df, check_df, on = '업체', how = 'left')

        o_df['건수'] = '-'
        o_df['상장트랙'] = '-'

        select_cols = ['상장일', '업체', '시장구분', '발행금액','인수회사', '인수금액', '인수수수료', '청약수수료추정', '수수료합계', '건수',
                       '주관형태', '상장트랙','공모가', '인수비율', '총기관배정수량', '청약일', '납입일']

        o_df = o_df.loc[:, select_cols].sort_values("상장일")
        o_df['인수금액'] /= 1000
        return o_df
    
    elif idx == 2:
        import copy
        i_df = copy.deepcopy(df)

        i_df['구주매출비중'] = i_df['구주매출']/(i_df['구주매출'] + i_df['신주모집']) * 100

        select_cols = ['청약일', '회사명', '대표주관회사', '납입일', '상장일', '공모금액(백만원)', '공모주수', '확정발행가액', '구주매출비중', '경쟁률', '인수수수료(천원)']

        o_df = i_df.loc[:, select_cols]

        o1 = o_df.groupby('회사명')[['인수수수료(천원)']].sum().reset_index()
        o2 = o_df.groupby('회사명')[['공모주수']].sum().reset_index()
        o3 = o_df.groupby('회사명')[['공모금액(백만원)']].sum().reset_index()

        r_df = pd.merge(o1, o2, on = '회사명')
        r_df = pd.merge(r_df, o3, on = '회사명')

        o_df = pd.merge(o_df, r_df, on = '회사명')
        del o_df['공모금액(백만원)_x'], o_df['공모주수_x'], o_df['인수수수료(천원)_x']
        o_df = o_df.rename(columns = {"인수수수료(천원)_y":"인수수수료(천원)", "공모주수_y":"공모주수", "공모금액(백만원)_y":"공모금액(천원)"})
        o_df.drop_duplicates(inplace = True)

        o_df['공모금액(천원)'] *= 1000
        o_df['기준가(평가가치)'] = np.NaN
        o_df['1차발행가액-1'] = np.NaN
        o_df['1차발행가액-2'] = np.NaN
        o_df['수요예측가중평균가'] = np.NaN
        o_df['결정비율'] = np.NaN
        o_df['공모비율'] = np.NaN
        o_df['상장요건'] = np.NaN
        o_df['코넥스여부'] = np.NaN

        select_cols = ['청약일','회사명','대표주관회사','납입일','상장일','공모금액(천원)','공모주수','기준가(평가가치)','1차발행가액-1','1차발행가액-2',
                       '수요예측가중평균가','확정발행가액','결정비율','공모비율','구주매출비중','상장요건','코넥스여부','경쟁률','인수수수료(천원)']

        o_df = o_df.loc[:, select_cols]
        o_df['수수료율'] = o_df['인수수수료(천원)'] / o_df['공모금액(천원)'] * 100
        o_df = o_df.sort_values('청약일')
        
        return o_df

    elif idx == 3:
        select_cols = ['인수기관', '청약일', '회사명', '대표주관회사', '인수회사', '납입일', '상장일', '공모금액(백만원)', '공모주수', '확정발행가액', '인수수수료(천원)', '인수비율']
        return df.loc[:, select_cols]

In [3]:
def change_corp(x):
    if "모간스탠리" in x : return "MS"
    elif "골드" in x : return "골드만"
    elif "씨티" in x : return "씨티"
    elif "메릴" in x : return "메릴린치"
    elif "케이비" in x : return "KB"
    elif "아이비케이" in x : return "IBK"
    elif "에스케이" in x : return "SK"
    elif "디비금융" in x : return "DB"
    else: return x.replace("투자", "").replace("금융", "").replace("증권", "").replace("에셋", "").replace("(주)", "").replace("㈜", "")

In [4]:
def get_case(x):
    if "공동대표" in x: return "공동대표"
    elif "공동주관" in x: return "공동"
    elif "대표" in x: return "대표"
    elif "인수" in x : return "인수"
    else: return "대표"

def get_regstate3(base_df, corp_name):
    base_df['주관형태'] = [get_case(x) for x in base_df['주관형태']]
    
    # 컬럼명 변경
    origin_feats = ['rcept_no', 'corp_cls', 'corp_code', 'corp_name', 'sbd', 'pymd', 'sband', 'asand', 'asstd', 'exstk', 'exprc', 'expd', 
                'rpt_rcpn', 'title', 'stksen', 'stkcnt', 'fv', 'slprc', 'slta', 'slmthn', 'actsen',
                'actnmn', 'udtcnt', 'udtamt', 'udtprc', 'udtmth', 'se', 'amt', 'hdr',
                'rl_cmp', 'bfsl_hdstk', 'slstk', 'atsl_hdstk', 'grtrs', 'exavivr', 'grtcnt']

    change_feats = ['접수번호', '법인구분', '고유번호', '회사명', '청약기일', '납입기일', '청약공고일', '배정공고일', '배정기준일', '행사대상증권','행사가격', '행사기간' ,
                    '주요사항보고서(접수번호)', '그룹명칭', '증권의종류', '증권수량', '액면가액', '모집(매출)가액', '모집(매출)총액', '모집(매출)방법', '인수인구분', 
                    '인수인명', '인수수량', '인수금액', '인수대가', '인수방법', '구분', '금액', '보유자',
                    '회사와의관계', '매출전보유증권수', '매출증권수', '매출후보유증권수', '부여사유', '행사가능투자자', '부여수량']

    change_dict = {x:y for x, y in zip(origin_feats, change_feats)}
    
    # 지분증권 가져오기
    temp_df = dart.regstate(corp_name, '지분증권')
    
    if temp_df.shape[0] == 0:
        # 수수료율 기반 수수료 재산정
        #base_df['수수료'] = '-'
        base_df['수수료'] = 0

        # IB1본부 양식 전처리
        df1 = base_df.loc[[True if "대표" in x else False for x in base_df['주관형태']], :]
        base_df['대표주관회사'] = ", ".join(df1['인수기관'])
        base_df['인수회사'] = ", ".join(base_df['인수기관'])

        if (base_df.shape[0] == 1) and (base_df['주관형태'].values[0] in ("주1)", "-", "")):
            base_df['대표주관회사'] = base_df['인수기관']

        select_col = ['인수기관', '구분', '청약일', '회사명', '대표주관회사', '인수회사', '납입일', '상장일', '인수금액', '공모주수', '확정발행가액', '수수료', '인수비율', '주관형태']

        change_cols = {"인수금액":"공모금액(백만원)", "수수료":"인수수수료(천원)"}
        output = base_df.loc[:, select_col]
        output = output.rename(columns = change_cols)

        output['공모금액(백만원)'] = 0
        output['청약일'] = '-'
        output['납입일'] = '-'
        output['상장일'] = '-'

        return output
        
    else:
        temp_df.columns = [change_dict[x] for x in temp_df.columns]
        temp_df = temp_df.loc[temp_df['접수번호'] == np.max(temp_df['접수번호'].unique())]
        temp_df.index = [x for x in range(temp_df.shape[0])]

        # 증권사 이름 전처리
        temp_df['인수인명'] = [x if type(x) != str else change_corp(x) for x in temp_df['인수인명']]

        # 인수 수수료율 산출
        temp_df['인수금액'] = [set_num(x) for x in temp_df['인수금액']]
        temp_df['인수대가'] = [set_num(x) for x in temp_df['인수대가']]
        temp_df['수수료율'] = temp_df['인수대가'] / temp_df['인수금액']

        fee_df = temp_df.loc[~temp_df['수수료율'].isna(), ['인수인명', '수수료율']]

        # 증권발행실적보고서와 결합
        base_df = pd.merge(base_df, fee_df, left_on = '인수기관', right_on = '인수인명')
        del base_df['인수인명']

        # 수수료율 기반 수수료 재산정
        base_df['수수료'] = (base_df['인수금액'] * base_df['수수료율'])/1000

        # IB1본부 양식 전처리
        df1 = base_df.loc[[True if "대표" in x else False for x in base_df['주관형태']], :]
        base_df['대표주관회사'] = ", ".join(df1['인수기관'])
        base_df['인수회사'] = ", ".join(base_df['인수기관'])
        
        if (base_df.shape[0] == 1) and (base_df['주관형태'].values[0] in ("주1)", "-", "")):
            base_df['대표주관회사'] = base_df['인수기관']
        
        select_col = ['인수기관','구분','청약일', '회사명', '대표주관회사', '인수회사', '납입일', '상장일', '인수금액', '공모주수', '확정발행가액', '수수료', '인수비율', '주관형태']

        change_cols = {"인수금액":"공모금액(백만원)", "수수료":"인수수수료(천원)"}
        output = base_df.loc[:, select_col]
        output = output.rename(columns = change_cols)
        
        output['공모금액(백만원)'] = [x/1000000 for x in output['공모금액(백만원)']]
        output['청약일'] = [re.sub("[일\s]", "", re.sub("[년월]", "-", x)).replace(".", "-") for x in output['청약일']]
        output['납입일'] = [re.sub("[일\s]", "", re.sub("[년월]", "-", x)).replace(".", "-") for x in output['납입일']]
        output['상장일'] = [re.sub("[일\s]", "", re.sub("[년월]", "-", x)).replace(".", "-") if type(x) == str else "-" for x in output['상장일']]
    
    return output

* API Setting

In [5]:
api_key = '1b39652cef07f626c9d37375edf582ee51b1407f'
dart = OpenDartReader(api_key)
dart_fss.set_api_key(api_key=api_key)

'1b39652cef07f626c9d37375edf582ee51b1407f'

* 수집 진행

In [118]:
# C=발행공시, D=지분공시
market_dict = {"Y": "코스피","K": "코스닥", "N": "코넥스", "E": "기타"}
start_dt = '2022-03-01'
end_dt = '2022-06-01'

info_df = dart.list(start=start_dt, end=end_dt, kind_detail='C001')
info_df = pd.concat([info_df, dart.list(start=start_dt, end=end_dt, kind_detail='G002')])

info_df = info_df.loc[[True if "증권발행실적보고서" in x else False for x in info_df.report_nm]]
info_df = info_df.loc[info_df.corp_cls.isin(['Y', 'K'])]
info_df.corp_cls = info_df.corp_cls.map(market_dict)

In [119]:
info_df

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
0,1534577,청담글로벌,362320,코스닥,[기재정정]증권발행실적보고서,20220531000178,청담글로벌,20220531,
3,273110,에스티큐브,52020,코스닥,증권발행실적보고서,20220527000377,에스티큐브,20220527,
8,1613281,하나금융22호스팩,418170,코스닥,증권발행실적보고서,20220520000255,하나금융22호스팩,20220520,
10,1364747,가온칩스,399720,코스닥,[기재정정]증권발행실적보고서,20220517000328,가온칩스,20220517,
11,1412822,솔루스첨단소재,336370,코스피,증권발행실적보고서,20220517000291,솔루스첨단소재,20220517,
17,140566,한탑,2680,코스닥,증권발행실적보고서,20220512000667,한탑,20220512,
20,1150515,대명에너지,389260,코스닥,증권발행실적보고서,20220509000566,대명에너지,20220509,
36,523307,다원시스,68240,코스닥,증권발행실적보고서,20220428000334,다원시스,20220428,
37,671376,티웨이항공,91810,코스피,증권발행실적보고서,20220426000401,티웨이항공,20220426,
38,1604371,상상인제3호스팩,415580,코스닥,증권발행실적보고서,20220422000774,상상인제3호스팩,20220422,


In [68]:
cnt = 0
except_list = []

for idx, corp_name in enumerate(info_df.corp_name):
    try:
        base_df = get_issuance3(info_df, corp_name)

        if base_df.shape[0] != 0:
            df3 = get_regstate3(base_df, corp_name)
            if cnt == 0:
                output = df3
            else:
                output = pd.concat([output, df3])
            cnt += 1

    except IndexError:
        except_list.append(corp_name)
        print(corp_name,"- 수집불가")

output = output.sort_values(['인수기관', '청약일'], ascending = True)
output.index = [x for x in range(output.shape[0])]
output = output.drop_duplicates()

코람코더원리츠 - 수집불가


In [117]:
info_df

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
2,1616808,키움제6호스팩,413600,코스닥,증권발행실적보고서,20220401000151,키움제6호스팩,20220401,
3,1601222,유진스팩8호,413630,코스닥,증권발행실적보고서,20220325001061,유진스팩8호,20220325,
4,1462605,지투파워,388050,코스닥,증권발행실적보고서,20220325000748,지투파워,20220325,
13,1307335,공구우먼,366030,코스닥,[기재정정]증권발행실적보고서,20220318000733,공구우먼,20220318,
14,534701,세아메카닉스,396300,코스닥,증권발행실적보고서,20220318000272,세아메카닉스,20220318,
15,1179617,한국비엔씨,256840,코스닥,증권발행실적보고서,20220317000639,한국비엔씨,20220317,
16,113207,대한전선,1440,코스피,증권발행실적보고서,20220317000512,대한전선,20220317,
18,307028,경남제약,53950,코스닥,증권발행실적보고서,20220316000321,경남제약,20220316,
22,1152586,유일로보틱스,388720,코스닥,증권발행실적보고서,20220311001114,유일로보틱스,20220311,
23,198697,일진디스플,20760,코스피,증권발행실적보고서,20220311000960,일진디스플,20220311,


In [115]:
output

Unnamed: 0,인수기관,구분,청약일,회사명,대표주관회사,인수회사,납입일,상장일,공모금액(백만원),공모주수,확정발행가액,인수수수료(천원),인수비율,주관형태
0,KB,코스닥,2022-02-24,엔지켐생명과학,KB,KB,2022-03-02,2022-03-21,121156.6644,3809958,31800.0,6.066471,71.89,대표
1,KB,코스닥,2022-03-22,지투파워,한국,"한국, KB",2022-03-25,2022-04-01,4354.2,265500,16400.0,179393.04,30.0,공동
2,미래,코스닥,2022-03-14,공구우먼,미래,미래,2022-03-17,-,22400.0,1120000,20000.0,644430.528,100.0,대표
3,신한,코스닥,2022-03-15,세아메카닉스,신한,신한,2022-03-18,-,29260.0,6650000,4400.0,753445.0,100.0,대표
4,유진,코스닥,2022-03-22,유진스팩8호,유진,유진,2022-03-25,-,10000.0,5000000,2000.0,350000.0,100.0,대표
5,키움,코스닥,2022-03-28,키움제6호스팩,키움,키움,2022-03-31,-,6400.0,3200000,2000.0,300000.0,100.0,대표
6,하나,코스닥,2022-02-25,모아데이타,하나,하나,2022-03-03,2022-03-10,16000.0,800000,20000.0,659200.0,100.0,대표
7,한국,코스닥,2022-03-07,유일로보틱스,한국,한국,2022-03-11,2022-03-18,21500.0,2150000,10000.0,775075.0,100.0,대표
8,한국,코스닥,2022-03-22,지투파워,한국,"한국, KB",2022-03-25,2022-04-01,10159.8,619500,16400.0,418583.76,70.0,대표


In [69]:
except_list

['코람코더원리츠']

* KIND

In [None]:
'''
options = Options()
options.add_argument('--disable-gpu')
options.add_argument('--headless')

@st.experimental_singleton
def get_driver():
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
'''

In [None]:
def get_kind(driver, code, FIRST = True):
    ## 01. 회사명 검색하기
    name_element = driver.find_element(By.ID, 'AKCKwd')
    name_element.click()
    name_element.clear()
    time.sleep(0.1)
    name_element.send_keys(code)

    if FIRST:
        ## 02. 신규상장만 활용
        check_box = '/html/body/section[2]/section/form/section/div/div[1]/table/tbody/tr[7]/td/label[{}]'

        for idx in range(4):
            driver.find_element(By.XPATH, check_box.format(idx+2)).click()

        time.sleep(0.1)

    ## 03. 기간 전체 설정
    driver.find_element(By.CLASS_NAME, 'ord-07').click()

    ## 04. 검색 시작
    search_element = driver.find_element(By.CLASS_NAME, 'btn-sprite.type-00.vmiddle.search-btn')
    search_element.click()
    time.sleep(1.5)

    ### 모든 데이터 수집
    temp_df = pd.read_html(driver.page_source)
    listing_df = [x for x in temp_df if "회사명" in x and "상장유형" in x][0]

    if listing_df.shape[0] != 0:
        driver.find_element(By.CSS_SELECTOR, '#main-contents > section.scrarea.type-00 > table > tbody > tr').click()
        time.sleep(0.25)

        driver.switch_to.window(driver.window_handles[1])
        wait = WebDriverWait(driver, 10, poll_frequency=0.25)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "btn-sprite.type-98.vmiddle")))
        
        time.sleep(0.5)
        # 상장주식수
        l_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(3) > tbody > tr:nth-child(9) > td:nth-child(2)')
        l_cnt = int(re.sub("[^0-9]", "", l_cnt.text))

        # 유통가능주식수
        c_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(3) > tbody > tr:nth-child(10) > td.txr')
        c_cnt = int(re.sub("[^0-9]", "", c_cnt.text))

        # 주요 제품
        m_product = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(3) > tbody > tr:nth-child(6) > td").text

        try:
            # switch tab
            driver.find_element(By.XPATH, '/html/body/form/section/nav/ul/li[2]/a').click()
            time.sleep(0.25)

            wait = WebDriverWait(driver, 10, poll_frequency=0.25)
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table:nth-child(15) > tbody > tr:nth-child(2) > td:nth-child(2)")))
            
        except ElementNotInteractableException:
            col_names = [x+"매출액" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])]
            col_names.extend([x+"영업이익" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])])
            col_names.extend([x+"당기순이익" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])])

            listing_df['종목코드'] = code
            listing_df['상장주식수'] = l_cnt
            listing_df['유통가능주식수'] = c_cnt
            listing_df['주요제품'] = m_product
            listing_df.index = [0]

            df_change = pd.DataFrame.from_dict({x:['-'] for x in col_names})
            df_change.index = [0]

            second_df = pd.concat([listing_df, df_change], axis = 1)
            
            driver.close()
            driver.switch_to.window(driver.window_handles[0])

            
            return second_df

        dfs = pd.read_html(driver.page_source, header=0)

        try:
            get_idx = [idx for idx, x in enumerate(dfs) if "매출액(수익)" in list(x['항목']) and "영업이익(손실)" in list(x['항목']) and "당기순이익(손실)" in list(x['항목'])][0]
        except:
            get_idx = 1
        df = dfs[get_idx]
        df = df.loc[df['항목'].isin(['매출액(수익)', '영업이익(손실)', '당기순이익(손실)'])]

        if df.shape[0] != 3:
            except_values = [x for x in ['매출액(수익)', '영업이익(손실)', '당기순이익(손실)'] if x not in list(df['항목'])]
            append_df = pd.DataFrame({"항목":[x for x in except_values],
                          df.columns[1]:['-' for x in range(len(except_values))],
                          df.columns[2]:['-' for x in range(len(except_values))],
                          df.columns[3]:['-' for x in range(len(except_values))]})
            df = pd.concat([df, append_df])
            df['항목'] = df['항목'].astype("category")
            df['항목'] = df['항목'].cat.set_categories(['매출액(수익)', '영업이익(손실)', '당기순이익(손실)'])
            df = df.sort_values("항목")

        driver.close()
        driver.switch_to.window(driver.window_handles[0])
        
        col_names = [x+"매출액" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])]
        col_names.extend([x+"영업이익" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])])
        col_names.extend([x+"당기순이익" + "({})".format(y) for x, y in zip(['전전연도', '직전연도', '당해연도'], ['T-2', 'T-1', 'T'])])
        
        listing_df['종목코드'] = code
        listing_df['상장주식수'] = l_cnt
        listing_df['유통가능주식수'] = c_cnt
        listing_df['주요제품'] = m_product
        listing_df.index = [0]
        
        df_change = pd.DataFrame.from_dict({x:[y] for x,y in zip(col_names, np.array(df.iloc[:, 1:]).reshape(1, -1)[0])})
        df_change.index = [0]
        
        second_df = pd.concat([listing_df, df_change], axis = 1)
        
    return second_df

In [None]:
def get_kind(driver, start_dt, end_dt):
    ## 01.KIND 접속
    driver.get("https://kind.krx.co.kr/listinvstg/listingcompany.do?method=searchListingTypeMain")

    wait = WebDriverWait(driver, 10, poll_frequency=0.25)
    wait.until(EC.presence_of_element_located((By.ID, "fromDate")))

    ## 02. 시작-종료 날짜 설정 후 검색
    start_date = driver.find_element(By.ID, 'fromDate')
    start_date.clear()
    time.sleep(0.05)
    start_date.send_keys(start_dt)
    start_date.send_keys(Keys.ESCAPE)
    time.sleep(0.05)

    end_date = driver.find_element(By.ID, 'toDate')
    end_date.clear()
    time.sleep(0.05)
    end_date.send_keys(end_dt)
    end_date.send_keys(Keys.ESCAPE)

    time.sleep(0.05)
    ## 03. 신규상장만 활용
    check_box = '/html/body/section[2]/section/form/section/div/div[1]/table/tbody/tr[7]/td/label[{}]'

    for idx in range(4):
        driver.find_element(By.XPATH, check_box.format(idx+2)).click()

    search_element = driver.find_element(By.CLASS_NAME, 'btn-sprite.type-00.vmiddle.search-btn')
    search_element.click()
    
    time.sleep(1.5)
    cnt_element = driver.find_element(By.XPATH, '/html/body/section[2]/section/article/section[2]/div[2]/em')
    loop_cnt = int(np.floor(int(cnt_element.text) / 15)) + 1
    
    time.sleep(0.05)
    for tot_idx in range(loop_cnt):
        ### 모든 데이터 수집
        temp_df = pd.read_html(driver.page_source)
        listing_df = [x for x in temp_df if "회사명" in x and "상장유형" in x][0]

        s_list, m_list, r_list, n_list, o_list, c_list = [], [], [], [], [], []

        for idx in range(listing_df.shape[0]):
            row_element = driver.find_element(By.XPATH, "/html/body/section[2]/section/article/section[1]/table/tbody/tr[{}]".format(idx+1))
            row_element.click()

            time.sleep(0.25)

            driver.switch_to.window(driver.window_handles[1])
            time.sleep(0.25)

            wait = WebDriverWait(driver, 10, poll_frequency=0.25)
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table:nth-child(6) > tbody > tr:nth-child(2) > td:nth-child(4)")))

            # 상장 주식수
            s_cnt = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(6) > tbody > tr:nth-child(2) > td:nth-child(4)").text
            # 주요 제품
            m_product = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(3) > tbody > tr:nth-child(6) > td").text

            # switch tab
            driver.find_element(By.XPATH, '/html/body/form/section/nav/ul/li[3]/a').click()
            time.sleep(0.25)

            wait = WebDriverWait(driver, 10, poll_frequency=0.25)
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td")))

            # 경쟁률
            ratio = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td').text
            # 신주모집
            new_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(2) > td:nth-child(2)').text
            # 구주매출
            old_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(3) > td:nth-child(2)').text
            # 기관 배정 수량
            c_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-foot.mt3 > tbody > tr:nth-child(3) > td:nth-child(2)').text

            s_list.append(s_cnt)
            m_list.append(m_product)
            r_list.append(ratio)
            n_list.append(new_s)
            o_list.append(old_s)
            c_list.append(c_cnt)

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
            time.sleep(0.1)

        o_list = [set_num(x) if x != '-' else 0 for x in o_list]
        n_list = [set_num(x) if x != '-' else 0 for x in n_list]
        s_list = [set_num(x) if x != '-' else 0 for x in s_list]
        c_list = [set_num(x) if x != '-' else 0 for x in c_list]

        right_df = pd.DataFrame({"회사명": listing_df['회사명'], "상장주식수": s_list, "신주모집": n_list, "구주매출": o_list, "경쟁률": r_list, "기관배정수량": c_list, "주요제품": m_list})

        select_cols = ['회사명', '상장일']
        select_cols.extend(right_df.columns[1:])

        if tot_idx == 0:
            output_df = pd.merge(listing_df, right_df, on = '회사명', how = 'inner')[select_cols]
        else:
            append_df = pd.merge(listing_df, right_df, on = '회사명', how = 'inner')[select_cols]
            output_df = pd.concat([output_df, append_df])

        ## 다음 페이지 진입
        next_element = driver.find_element(By.CLASS_NAME, 'next')
        next_element.click()
        time.sleep(3)
        
    return output_df

In [None]:
def go_kind(driver, code, FIRST = True):
    ## 01.KIND 접속
    driver.get("https://kind.krx.co.kr/listinvstg/listingcompany.do?method=searchListingTypeMain")

    wait = WebDriverWait(driver, 10, poll_frequency=0.25)
    wait.until(EC.presence_of_element_located((By.ID, "fromDate")))
    
    ## 01. 회사명 검색하기
    name_element = driver.find_element(By.ID, 'AKCKwd')
    name_element.click()
    name_element.clear()
    time.sleep(0.1)
    name_element.send_keys(code)

    ## 02. 신규상장만 활용
    if FIRST:
        check_box = '/html/body/section[2]/section/form/section/div/div[1]/table/tbody/tr[7]/td/label[{}]'

        for idx in range(4):
            driver.find_element(By.XPATH, check_box.format(idx+2)).click()

        time.sleep(0.1)

    ## 03. 기간 전체 설정
    driver.find_element(By.CLASS_NAME, 'ord-07').click()
    
    ## 04. 검색 시작
    search_element = driver.find_element(By.CLASS_NAME, 'btn-sprite.type-00.vmiddle.search-btn')
    search_element.click()
    time.sleep(1.5)

def go_inner(driver):
    # table 확인
    temp_df = pd.read_html(driver.page_source)
    listing_df = [x for x in temp_df if "회사명" in x and "상장유형" in x][0]

    if listing_df.shape[0] != 0:
        driver.find_element(By.CSS_SELECTOR, '#main-contents > section.scrarea.type-00 > table > tbody > tr').click()
        time.sleep(0.25)

        driver.switch_to.window(driver.window_handles[1])
        wait = WebDriverWait(driver, 10, poll_frequency=0.25)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "btn-sprite.type-98.vmiddle")))
    else:
        return "검색실패"

def get_overview(driver):
    # 상장주식수
    l_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(3) > tbody > tr:nth-child(9) > td:nth-child(2)')
    l_cnt = int(re.sub("[^0-9]", "", l_cnt.text))

    # 유통가능주식수
    c_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(3) > tbody > tr:nth-child(10) > td.txr')
    c_cnt = int(re.sub("[^0-9]", "", c_cnt.text))

    # 주요 제품
    m_product = driver.find_element(By.CSS_SELECTOR, "#tab-contents > table:nth-child(3) > tbody > tr:nth-child(6) > td").text
    
    return l_cnt, c_cnt, m_product

def get_inform(driver):
    # 경쟁률
    ratio = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td').text
    # 신주모집
    new_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(2) > td:nth-child(2)').text
    # 구주매출
    old_s = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table:nth-child(5) > tbody > tr:nth-child(3) > td:nth-child(2)').text
    # 기관 배정 수량
    c_cnt = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-foot.mt3 > tbody > tr:nth-child(3) > td:nth-child(2)').text
    # 상장일
    l_date = driver.find_element(By.CSS_SELECTOR, '#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(4) > td').text
    
    return ratio, new_s, old_s, c_cnt, l_date

In [121]:
info_df.tail()

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
102,606886,엔지켐생명과학,183490,코스닥,증권발행실적보고서,20220302000569,엔지켐생명과학,20220302,
12,1442966,마스턴프리미어리츠,357430,코스피,증권발행실적보고서(집합투자증권)((주)마스턴프리미어제1호위탁관리부동산투자회사),20220518000288,마스턴프리미어리츠,20220518,
16,1430475,코람코에너지리츠,357120,코스피,증권발행실적보고서(집합투자증권)(주식회사코람코에너지플러스위탁관리부동산투자회사),20220427000535,코람코에너지리츠,20220427,
22,1276594,신한알파리츠,293940,코스피,증권발행실적보고서(집합투자증권)((주)신한알파위탁관리부동산투자회사),20220418000015,신한알파리츠,20220418,
35,1180118,코람코더원리츠,417310,코스피,[기재정정]증권발행실적보고서(집합투자증권)(㈜코람코더원위탁관리부동산투자회사),20220317000309,코람코더원리츠,20220317,


In [99]:
code = '417310'

In [101]:
# driver 실행
driver = webdriver.Chrome()

# kind 접속 및 code 검색
go_kind(driver, code, False)

# 세부 홉페이지 접속
value = go_inner(driver)

if value == "검색실패":
    print("x")

driver.switch_to.window(driver.window_handles[-1])
wait = WebDriverWait(driver, 10, poll_frequency=0.25)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table:nth-child(6) > tbody > tr:nth-child(2) > td:nth-child(4)")))
    
# 회사 개요 수집, 상장주식수, 유통가능주식수, 제품
l_cnt, c_cnt, m_product = get_overview(driver)

driver.find_element(By.CSS_SELECTOR, '#tabName > a[title="공모정보"]').click()

time.sleep(0.25)

wait = WebDriverWait(driver, 10, poll_frequency=0.25)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#tab-contents > table.detail.type-01.chain-head.mt10 > tbody > tr:nth-child(3) > td")))

# 경쟁률, 신주모집, 구주매출, 기관 배정 수량, 상장일
ratio, new_s, old_s, c_cnt, l_date = get_inform(driver)

In [105]:
l_cnt, c_cnt, m_product, ratio, new_s, old_s, c_cnt, l_date

(40400000,
 '13,650,000',
 '서울 영등포구 여의도 소재 하나금융투자 빌딩',
 '450.7 : 1',
 '19,500,000',
 '-',
 '13,650,000',
 '2022-03-28')

In [5]:
info_df

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
0,1534577,청담글로벌,362320,코스닥,[기재정정]증권발행실적보고서,20220531000178,청담글로벌,20220531,
3,273110,에스티큐브,52020,코스닥,증권발행실적보고서,20220527000377,에스티큐브,20220527,
8,1613281,하나금융22호스팩,418170,코스닥,증권발행실적보고서,20220520000255,하나금융22호스팩,20220520,
10,1364747,가온칩스,399720,코스닥,[기재정정]증권발행실적보고서,20220517000328,가온칩스,20220517,
11,1412822,솔루스첨단소재,336370,코스피,증권발행실적보고서,20220517000291,솔루스첨단소재,20220517,
17,140566,한탑,2680,코스닥,증권발행실적보고서,20220512000667,한탑,20220512,
20,1150515,대명에너지,389260,코스닥,증권발행실적보고서,20220509000566,대명에너지,20220509,
36,523307,다원시스,68240,코스닥,증권발행실적보고서,20220428000334,다원시스,20220428,
37,671376,티웨이항공,91810,코스피,증권발행실적보고서,20220426000401,티웨이항공,20220426,
38,1604371,상상인제3호스팩,415580,코스닥,증권발행실적보고서,20220422000774,상상인제3호스팩,20220422,


In [70]:

output_df = get_kind(driver, start_dt, end_dt)

time.sleep(0.5)
driver.close()

right_df = info_df.loc[:, ['corp_name', 'corp_cls']]
right_df.columns = ['회사명', '시장구분']

final_df = pd.merge(output.loc[:, output.columns != '상장일'], output_df, on = '회사명', how = 'inner')
final_df = pd.merge(final_df, right_df, on = '회사명', how = 'left')
final_df = final_df.drop_duplicates()

In [74]:
info_df

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
2,1616808,키움제6호스팩,413600,코스닥,증권발행실적보고서,20220401000151,키움제6호스팩,20220401,
3,1601222,유진스팩8호,413630,코스닥,증권발행실적보고서,20220325001061,유진스팩8호,20220325,
4,1462605,지투파워,388050,코스닥,증권발행실적보고서,20220325000748,지투파워,20220325,
13,1307335,공구우먼,366030,코스닥,[기재정정]증권발행실적보고서,20220318000733,공구우먼,20220318,
14,534701,세아메카닉스,396300,코스닥,증권발행실적보고서,20220318000272,세아메카닉스,20220318,
15,1179617,한국비엔씨,256840,코스닥,증권발행실적보고서,20220317000639,한국비엔씨,20220317,
16,113207,대한전선,1440,코스피,증권발행실적보고서,20220317000512,대한전선,20220317,
18,307028,경남제약,53950,코스닥,증권발행실적보고서,20220316000321,경남제약,20220316,
22,1152586,유일로보틱스,388720,코스닥,증권발행실적보고서,20220311001114,유일로보틱스,20220311,
23,198697,일진디스플,20760,코스피,증권발행실적보고서,20220311000960,일진디스플,20220311,


In [73]:
output_df

Unnamed: 0,회사명,상장일,상장주식수,신주모집,구주매출,경쟁률,기관배정수량,주요제품
0,지투파워,2022-04-01,18217551.0,737000.0,148000.0,2029.37 : 1,663750.0,"수·배전반, 태양광발전시스템, 인버터(PCS)"
1,유진스팩8호,2022-03-31,5920000.0,5000000.0,0.0,350.85 : 1,3750000.0,기업인수합병
2,코람코더원리츠,2022-03-28,40400000.0,19500000.0,0.0,450.7 : 1,13650000.0,서울 영등포구 여의도 소재 하나금융투자 빌딩
3,세아메카닉스,2022-03-24,26489500.0,4450000.0,2200000.0,2475.87 : 1,4655000.0,"TV스탠드, 전기 수소차 부품 등"
4,공구우먼,2022-03-23,22618350.0,1120000.0,0.0,7.54 : 1,830000.0,플러스사이즈 여성용 의류 및 잡화
5,유일로보틱스,2022-03-18,8580846.0,2150000.0,0.0,2535.3 : 1,1483500.0,"취출로봇, 사출장비 및 자동화시스템"
6,모아데이타,2022-03-10,33460272.0,800000.0,0.0,24 : 1,600000.0,인공지능 기반 ICT 시스템 이상탐지 및 예측 솔루션
7,에스케이증권7호스팩,2022-03-07,42652455.0,3000000.0,0.0,791.69 : 1,2250000.0,기업인수합병
8,비씨엔씨,2022-03-03,12760303.0,2500000.0,0.0,2686 : 1,1640000.0,반도체용 합성쿼츠 포커스링
9,노을,2022-03-03,11453530.0,1500000.0,0.0,14.66 : 1,960000.0,융복합 체외진단 플랫폼


* 38커뮤니케이션

In [57]:
cnt = 0
max_page = 20

for page in range(1, max_page+1):
    outer_url = 'http://www.38.co.kr/html/fund/index.htm?o=r1&page={}'.format(page)
    base_url = 'http://www.38.co.kr/html/fund'
    
    response = requests.get(outer_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    tb_src = soup.select('table[summary="수요예측결과"]')
    
    temp_df = pd.read_html(str(tb_src))[0]
    temp_df = temp_df.loc[~temp_df['기업명'].isna()]
    
    if cnt == 0:
        url_tags = soup.select('tbody > tr > td > a')
        temp_df['url'] = [base_url + x.attrs['href'].split(".")[-1] for x in url_tags]
        outer_df = temp_df

    else:
        url_tags = soup.select('tbody > tr > td > a')
        temp_df['url'] = [base_url + x.attrs['href'].split(".")[-1] for x in url_tags]
        outer_df = pd.concat([outer_df, temp_df])
        
    cnt += 1
    
outer_df = outer_df.loc[outer_df['기업명'].isin(final_df['회사명'])]

In [58]:
outer_df

Unnamed: 0,기업명,예측일,공모희망가(원),공모가(원),공모금액 (백만원),기관 경쟁률,의무보유 확약,주간사,url
10,지투파워,2022.03.17,"13,500~16,400",16400.0,11947.0,1729.63:1,11.16%,"한국투자증권,KB증권",http://www.38.co.kr/html/fund/?o=v&no=1759&l=&...
11,유진스팩8호,2022.03.17,"2,000~2,000",2000.0,10000.0,,-,유진투자증권,http://www.38.co.kr/html/fund/?o=v&no=1767&l=&...
12,세아메카닉스,2022.03.10,"3,500~4,000",4400.0,23275.0,1812.83:1,6.73%,신한금융투자,http://www.38.co.kr/html/fund/?o=v&no=1764&l=&...
13,공구우먼,2022.03.07,"26,000~31,000",20000.0,36400.0,56.9:1,2.65%,미래에셋증권,http://www.38.co.kr/html/fund/?o=v&no=1763&l=&...
14,유일로보틱스,2022.02.24,"7,600~9,200",10000.0,16340.0,1756.45:1,15.92%,한국투자증권,http://www.38.co.kr/html/fund/?o=v&no=1761&l=&...
15,모아데이타,2022.02.21,"24,000~28,000",20000.0,19200.0,114.19:1,0.87%,하나금융투자,http://www.38.co.kr/html/fund/?o=v&no=1758&l=&...


In [59]:
final_df.columns

Index(['인수기관', '구분', '청약일', '회사명', '대표주관회사', '인수회사', '납입일', '공모금액(백만원)',
       '공모주수', '확정발행가액', '인수수수료(천원)', '인수비율', '주관형태', '상장일', '상장주식수', '신주모집',
       '구주매출', '경쟁률', '기관배정수량', '주요제품', '시장구분'],
      dtype='object')

* 최종 결과 형태

In [None]:
def change_form(df, idx):
    if idx == 1:
        import copy
        i_df = copy.deepcopy(df)
        f_df = make_fee(i_df)

        change_cols = {"인수수수료(천원)":"인수수수료"}

        f_df['인수수수료(천원)'] /= 100000
        f_df['청약수수료추정'] = [x if "스팩" not in y else 0 for x, y in zip(f_df['청약수수료추정'], f_df['회사명'])]
        f_df['수수료합계'] = f_df['인수수수료(천원)'] + f_df['청약수수료추정']

        select_cols = ['상장일', '회사명', '구분', '인수기관', '공모금액(백만원)', '인수수수료(천원)', '청약수수료추정', '수수료합계', '주관형태',
                      '확정발행가액', '인수비율', '기관배정수량', '청약일', '납입일']

        # 공모금액 합계, 주관형태, 상장트랙
        o_df = f_df.loc[:, select_cols]
        o_df.columns = ['상장일', '업체', '시장구분', '인수회사', '인수금액', '인수수수료', '청약수수료추정', '수수료합계', '주관형태',
                      '공모가', '인수비율', '총기관배정수량', '청약일', '납입일']

        check_df = pd.DataFrame(o_df.groupby('업체')['인수금액'].sum()).reset_index()
        check_df.rename(columns = {"인수금액":"발행금액"}, inplace = True)

        o_df = pd.merge(o_df, check_df, on = '업체', how = 'left')

        o_df['건수'] = '-'
        o_df['상장트랙'] = '-'

        select_cols = ['상장일', '업체', '시장구분', '발행금액','인수회사', '인수금액', '인수수수료', '청약수수료추정', '수수료합계', '건수',
                       '주관형태', '상장트랙','공모가', '인수비율', '총기관배정수량', '청약일', '납입일']

        o_df = o_df.loc[:, select_cols].sort_values("상장일")
        o_df['인수금액'] /= 1000
        return o_df
    
    elif idx == 2:
        import copy
        i_df = copy.deepcopy(df)

        i_df['구주매출비중'] = i_df['구주매출']/(i_df['구주매출'] + i_df['신주모집']) * 100

        select_cols = ['청약일', '회사명', '대표주관회사', '납입일', '상장일', '공모금액(백만원)', '공모주수', '확정발행가액', '구주매출비중', '경쟁률', '인수수수료(천원)']

        o_df = i_df.loc[:, select_cols]

        o1 = o_df.groupby('회사명')[['인수수수료(천원)']].sum().reset_index()
        o2 = o_df.groupby('회사명')[['공모주수']].sum().reset_index()
        o3 = o_df.groupby('회사명')[['공모금액(백만원)']].sum().reset_index()

        r_df = pd.merge(o1, o2, on = '회사명')
        r_df = pd.merge(r_df, o3, on = '회사명')

        o_df = pd.merge(o_df, r_df, on = '회사명')
        del o_df['공모금액(백만원)_x'], o_df['공모주수_x'], o_df['인수수수료(천원)_x']
        o_df = o_df.rename(columns = {"인수수수료(천원)_y":"인수수수료(천원)", "공모주수_y":"공모주수", "공모금액(백만원)_y":"공모금액(천원)"})
        o_df.drop_duplicates(inplace = True)

        o_df['공모금액(천원)'] *= 1000
        o_df['기준가(평가가치)'] = np.NaN
        o_df['1차발행가액-1'] = np.NaN
        o_df['1차발행가액-2'] = np.NaN
        o_df['수요예측가중평균가'] = np.NaN
        o_df['결정비율'] = np.NaN
        o_df['공모비율'] = np.NaN
        o_df['상장요건'] = np.NaN
        o_df['코넥스여부'] = np.NaN

        select_cols = ['청약일','회사명','대표주관회사','납입일','상장일','공모금액(천원)','공모주수','기준가(평가가치)','1차발행가액-1','1차발행가액-2',
                       '수요예측가중평균가','확정발행가액','결정비율','공모비율','구주매출비중','상장요건','코넥스여부','경쟁률','인수수수료(천원)']

        o_df = o_df.loc[:, select_cols]
        o_df['수수료율'] = o_df['인수수수료(천원)'] / o_df['공모금액(천원)'] * 100
        o_df = o_df.sort_values('청약일')
        
        return o_df

    elif idx == 3:
        select_cols = ['인수기관', '청약일', '회사명', '대표주관회사', '인수회사', '납입일', '상장일', '공모금액(백만원)', '공모주수', '확정발행가액', '인수수수료(천원)', '인수비율']
        return df.loc[:, select_cols]

In [83]:
df1 = change_form(final_df, 1)
df2 = change_form(final_df, 2)
df3 = change_form(final_df, 3)

In [95]:
df1.to_excel("IB1본부_01_리그테이블IPO_2303-230523.xlsx", index = False)
df2.to_excel("IB1본부_02_IPO통합집계_Rawdata_2303-230523.xlsx", index = False)
df3.to_excel("IB1본부_03_IPO현황_Summary_2303-230523.xlsx", index = False)

In [106]:
df1 = pd.read_excel("IB1본부_01_리그테이블IPO_2303-230523.xlsx")
df2 = pd.read_excel("IB1본부_02_IPO통합집계_Rawdata_2303-230523.xlsx")
df3 = pd.read_excel("IB1본부_03_IPO현황_Summary_2303-230523.xlsx")

In [107]:
df1

Unnamed: 0,상장일,업체,시장구분,발행금액,인수회사,인수금액,인수수수료,청약수수료추정,수수료합계,건수,주관형태,상장트랙,공모가,인수비율,총기관배정수량,청약일,납입일
0,2023-03-09,엔에이치스팩28호,코스닥,6800.0,NH,6.8,2.04,0.0,2.04,-,대표,-,2000,100,2550000,2023-02-27,2023-03-03
1,2023-03-13,금양그린파워,코스닥,30073.2,삼성,30.0732,9.29262,2.25549,11.54811,-,대표,-,10000,100,2255490,2023-03-02,2023-03-07
2,2023-03-15,미래에셋드림스팩1호,코스닥,70000.0,미래,70.0,14.0,0.0,14.0,-,대표,-,10000,100,5250000,2023-03-06,2023-03-09
3,2023-03-17,유안타제12호스팩,코스닥,9000.0,유안타,9.0,2.7,0.0,2.7,-,대표,-,2000,100,3375000,2023-03-07,2023-03-10
4,2023-03-29,LB인베스트먼트,코스닥,23552.0397,미래,23.55204,4.85172,1.766307,6.618027,-,대표,-,5100,100,3463347,2023-03-20,2023-03-23
5,2023-03-30,지아이이노베이션,코스닥,26000.0,NH,10.4,4.16,0.78,4.94,-,공동대표,-,13000,40,1500000,2023-03-21,2023-03-24
6,2023-03-30,지아이이노베이션,코스닥,26000.0,삼성,5.2,2.08,0.39,2.47,-,공동,-,13000,20,1500000,2023-03-21,2023-03-24
7,2023-03-30,지아이이노베이션,코스닥,26000.0,하나,10.4,4.16,0.78,4.94,-,공동대표,-,13000,40,1500000,2023-03-21,2023-03-24
8,2023-04-06,하나27호스팩,코스닥,10000.0,하나,10.0,3.0,0.0,3.0,-,대표,-,2000,100,3750000,2023-03-27,2023-03-30
9,2023-04-06,IBKS제22호스팩,코스닥,8000.0,IBK,8.0,3.0,0.0,3.0,-,대표,-,2000,100,3000000,2023-03-27,2023-03-30


In [110]:
df2

Unnamed: 0,청약일,회사명,대표주관회사,납입일,상장일,공모금액(천원),공모주수,기준가(평가가치),1차발행가액-1,1차발행가액-2,수요예측가중평균가,확정발행가액,결정비율,공모비율,구주매출비중,상장요건,코넥스여부,경쟁률,인수수수료(천원),수수료율
0,2023-02-27,엔에이치스팩28호,NH,2023-03-03,2023-03-09,6800000.0,3400000,,,,,2000,,,0.0,,,374.56 : 1,204000.0,3.0
1,2023-03-02,금양그린파워,삼성,2023-03-07,2023-03-13,30073200.0,3007320,,,,,10000,,,0.0,,,1312.1 : 1,929262.0,3.09
2,2023-03-06,미래에셋드림스팩1호,미래,2023-03-09,2023-03-15,70000000.0,7000000,,,,,10000,,,0.0,,,0.46 : 1,1400000.0,2.0
3,2023-03-07,유안타제12호스팩,유안타,2023-03-10,2023-03-17,9000000.0,4500000,,,,,2000,,,0.0,,,1.54 : 1,270000.0,3.0
4,2023-03-20,LB인베스트먼트,미래,2023-03-23,2023-03-29,23552039.7,4618047,,,,,5100,,,33.333333,,,1165.76 : 1,485171.976,2.06
5,2023-03-21,지아이이노베이션,"NH, 하나",2023-03-24,2023-03-30,26000000.0,2000000,,,,,13000,,,0.0,,,262 : 1,1040000.0,4.0
6,2023-03-27,IBKS제22호스팩,IBK,2023-03-30,2023-04-06,8000000.0,4000000,,,,,2000,,,0.0,,,51.76 : 1,300000.0,3.75
7,2023-03-27,하나27호스팩,하나,2023-03-30,2023-04-06,10000000.0,5000000,,,,,2000,,,0.0,,,2.45 : 1,300000.0,3.0
8,2023-03-28,미래에셋비전스팩3호,미래,2023-03-31,2023-04-07,9000000.0,4500000,,,,,2000,,,0.0,,,35.13 : 1,270000.0,3.0
9,2023-04-17,마이크로투나노,한국,2023-04-20,2023-04-26,15500000.0,1000000,,,,,15500,,,0.0,,,1636.6 : 1,478950.0,3.09


In [111]:
df3

Unnamed: 0,인수기관,청약일,회사명,대표주관회사,인수회사,납입일,상장일,공모금액(백만원),공모주수,확정발행가액,인수수수료(천원),인수비율
0,IBK,2023-03-27,IBKS제22호스팩,IBK,IBK,2023-03-30,2023-04-06,8000.0,4000000,2000,300000.0,100
1,NH,2023-02-27,엔에이치스팩28호,NH,NH,2023-03-03,2023-03-09,6800.0,3400000,2000,204000.0,100
2,NH,2023-03-21,지아이이노베이션,"NH, 하나","NH, 하나, 삼성",2023-03-24,2023-03-30,10400.0,800000,13000,416000.0,40
3,삼성,2023-03-21,지아이이노베이션,"NH, 하나","NH, 하나, 삼성",2023-03-24,2023-03-30,5200.0,400000,13000,208000.0,20
4,하나,2023-03-21,지아이이노베이션,"NH, 하나","NH, 하나, 삼성",2023-03-24,2023-03-30,10400.0,800000,13000,416000.0,40
5,SK,2023-05-09,씨유박스,신한,"신한, SK, 신영",2023-05-12,2023-05-19,2250.0,150000,15000,69750.0,10
6,신영,2023-05-09,씨유박스,신한,"신한, SK, 신영",2023-05-12,2023-05-19,2250.0,150000,15000,67500.0,10
7,신한,2023-05-09,씨유박스,신한,"신한, SK, 신영",2023-05-12,2023-05-19,18000.0,1200000,15000,1021500.0,80
8,미래,2023-03-06,미래에셋드림스팩1호,미래,미래,2023-03-09,2023-03-15,70000.0,7000000,10000,1400000.0,100
9,미래,2023-03-20,LB인베스트먼트,미래,미래,2023-03-23,2023-03-29,23552.0397,4618047,5100,485171.976,100


* 이 데이터에다가 리츠를 추가하면 됨 리츠 관련

#### 1. 인수 수수료

- 3. 집합투자증권 발행에 관한 사항 > 나. 집합투자증권 발행 비용

#### 2. 청약일정, 납입일, 경쟁률, 상장일, 공모 주식수, 발행 주식 수, 공모금액

- KIND, 종목 코드로 검색해야 함

#### 3. 청약일정, 납입일, 경쟁률, 상장일, 공모 주식수, 공모 금액

In [64]:
info_df

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
2,1616808,키움제6호스팩,413600,코스닥,증권발행실적보고서,20220401000151,키움제6호스팩,20220401,
3,1601222,유진스팩8호,413630,코스닥,증권발행실적보고서,20220325001061,유진스팩8호,20220325,
4,1462605,지투파워,388050,코스닥,증권발행실적보고서,20220325000748,지투파워,20220325,
13,1307335,공구우먼,366030,코스닥,[기재정정]증권발행실적보고서,20220318000733,공구우먼,20220318,
14,534701,세아메카닉스,396300,코스닥,증권발행실적보고서,20220318000272,세아메카닉스,20220318,
15,1179617,한국비엔씨,256840,코스닥,증권발행실적보고서,20220317000639,한국비엔씨,20220317,
16,113207,대한전선,1440,코스피,증권발행실적보고서,20220317000512,대한전선,20220317,
18,307028,경남제약,53950,코스닥,증권발행실적보고서,20220316000321,경남제약,20220316,
22,1152586,유일로보틱스,388720,코스닥,증권발행실적보고서,20220311001114,유일로보틱스,20220311,
23,198697,일진디스플,20760,코스피,증권발행실적보고서,20220311000960,일진디스플,20220311,


In [156]:
from xbbg import blp

Skipped: could not import 'blpapi': No module named 'blpapi'

In [None]:
xml_text = dart.document(rcept_no)
xml_text = xml_text.replace("\n", "")

dt_table = get_table(xml_text, '청약 및 납입일정')
ov_table = get_table(xml_text, '발행 개요')
corp_table = get_table(xml_text, '인수기관별 인수금액')


In [152]:
# C=발행공시, D=지분공시
market_dict = {"Y": "코스피","K": "코스닥", "N": "코넥스", "E": "기타"}
start_dt = '2022-05-01'
end_dt = '2022-07-01'
#start_dt = '2023-01-01'
#end_dt = '2023-03-01'

start_dt2 = datetime.datetime.strptime(start_dt, '%Y-%m-%d') - datetime.timedelta(30)
end_dt2 = end_dt

info_df = dart.list(start=start_dt, end=end_dt, kind_detail='C001')
info_df = pd.concat([info_df, dart.list(start=start_dt, end=end_dt, kind_detail='G002')])

info_df = info_df.loc[[True if "증권발행실적보고서" in x else False for x in info_df.report_nm]]
info_df = info_df.loc[info_df.corp_cls.isin(['Y', 'K'])]
info_df.corp_cls = info_df.corp_cls.map(market_dict)

In [153]:
info_df.tail()

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,report_nm,rcept_no,flr_nm,rcept_dt,rm
69,1364747,가온칩스,399720,코스닥,[기재정정]증권발행실적보고서,20220517000328,가온칩스,20220517,
70,1412822,솔루스첨단소재,336370,코스피,증권발행실적보고서,20220517000291,솔루스첨단소재,20220517,
76,140566,한탑,2680,코스닥,증권발행실적보고서,20220512000667,한탑,20220512,
79,1150515,대명에너지,389260,코스닥,증권발행실적보고서,20220509000566,대명에너지,20220509,
22,1442966,마스턴프리미어리츠,357430,코스피,증권발행실적보고서(집합투자증권)((주)마스턴프리미어제1호위탁관리부동산투자회사),20220518000288,마스턴프리미어리츠,20220518,


* 리츠의 경우 info_df 수집 완료 후, KIND-신규상장 리스트만 추가할 것

In [148]:
rcept_no = '20220518000288'

xml_text = dart.document(rcept_no)
xml_text = xml_text.replace("\n", "")

soup = BeautifulSoup(xml_text, 'html.parser')
table_src = [str(x) for x in soup.find_all('table')]
tables = [pd.read_html(x) for x in table_src if "인수기관" in x and "비고" in x]
print(tables[0][0])

      인수기관         인수수량             인수금액    비율      비고
0  삼성증권(주)  11,860,000주  59,300,000,000원  100%  대표주관회사
1       합계  11,860,000주  59,300,000,000원  100%     NaN


In [139]:
check_new

[]

In [88]:


#dt_table = get_table(xml_text, '청약 및 납입일정')
#ov_table = get_table(xml_text, '발행 개요')
corp_table = get_table(xml_text, '인수기관별 인수금액')


IndexError: list index out of range

In [64]:
cnt = 0
for idx, corp_name in enumerate(info_df.corp_name):
    try:
        base_df = get_issuance3(info_df, corp_name)
    except IndexError:
        print(corp_name,"- 수집불가")

    if base_df.shape[0] != 0:
        df3 = get_regstate3(base_df, corp_name)
        if cnt == 0:
            output = df3
        else:
            output = pd.concat([output, df3])
        cnt += 1
            
output = output.sort_values(['인수기관', '청약일'], ascending = True)
output.index = [x for x in range(output.shape[0])]
output = output.drop_duplicates()

신한알파리츠 - 수집불가
{'status': '013', 'message': '조회된 데이타가 없습니다.'}
삼성FN리츠 - 수집불가
{'status': '013', 'message': '조회된 데이타가 없습니다.'}
한화리츠 - 수집불가
{'status': '013', 'message': '조회된 데이타가 없습니다.'}


* 참고자료

In [None]:
# st.write('<p style="font-size:14px; color:red">'+'- 문서 '+info[i][:14]+'에서 오류 발생! 데이터솔루션부에 문의하세요.</p>',unsafe_allow_html=True)

In [355]:
df1 = dart.regstate('오토앤', '지분증권'); df1

Unnamed: 0,rcept_no,corp_cls,corp_code,corp_name,sbd,pymd,sband,asand,asstd,exstk,...,se,amt,hdr,rl_cmp,bfsl_hdstk,slstk,atsl_hdstk,grtrs,exavivr,grtcnt
0,20211206000109,K,1084294,오토앤,2022년 01월 11일 ~ 2022년 01월 12일,2022년 01월 14일,2022년 01월 11일,2022년 01월 14일,-,-,...,,,,,,,,,,
0,20211206000109,K,1084294,오토앤,,,,,,,...,,,,,,,,,,
0,20211206000109,K,1084294,오토앤,,,,,,,...,,,,,,,,,,
1,20211206000109,K,1084294,오토앤,,,,,,,...,,,,,,,,,,
0,20211206000109,K,1084294,오토앤,,,,,,,...,시설자금,-,,,,,,,,
1,20211206000109,K,1084294,오토앤,,,,,,,...,운영자금,11920254000,,,,,,,,
2,20211206000109,K,1084294,오토앤,,,,,,,...,채무상환,-,,,,,,,,
3,20211206000109,K,1084294,오토앤,,,,,,,...,발행제비용,528151000,,,,,,,,
0,20211206000109,K,1084294,오토앤,,,,,,,...,,,-,-,-,-,-,,,
0,20211206000109,K,1084294,오토앤,,,,,,,...,,,,,,,,-,-,-


In [376]:
from bs4 import BeautifulSoup
import re


In [372]:
xml_text = dart.document('20211206000109')
xml_text = xml_text.replace("\n", "")

soup = BeautifulSoup(xml_text, 'html.parser')

In [None]:
'{}</TITLE>.*?</TABLE-GROUP>'

In [None]:
re.findall(".*?원 ~ .*?원", str(xml_text))

In [358]:
table_src = soup.select('table')

In [367]:
def check_table(x):
    return "공모" in str(x) and "지분율" in str(x) and "IPO" in str(x)

In [368]:
tb_1 = [pd.read_html(str(x)) for x in table_src if check_table(str(x))]

In [371]:
tb_1[-1][0]

Unnamed: 0_level_0,구분,주주명,공모 후,공모 후,유통가능한 주식,유통가능한 주식,매각제한주식,매각제한주식,매각제한기간,매각제한사유
Unnamed: 0_level_1,구분,주주명,주식 수,지분율,주식 수,지분율,주식 수,지분율,(상장일 기준),매각제한사유
0,최대주주등,최찬욱,1431860,11.1%,-,-,1431860,11.1%,2년 6개월,주1)
1,최대주주등,이재엽,819500,6.4%,-,-,819500,6.4%,2년 6개월,주1)
2,최대주주등,이상용,792120,6.2%,-,-,792120,6.2%,2년 6개월,주1)
3,최대주주등,정재훈,68880,0.5%,-,-,68880,0.5%,2년 6개월,주1)
4,최대주주등,이창민,56280,0.4%,-,-,56280,0.4%,2년 6개월,주1)
5,최대주주등,이승훈,56280,0.4%,-,-,56280,0.4%,2년 6개월,주1)
6,최대주주등,전병철,42000,0.3%,-,-,42000,0.3%,2년 6개월,주1)
7,최대주주등,왕길항,123480,1.0%,-,-,123480,1.0%,2년 6개월,주1)
8,최대주주등,전은석,18900,0.1%,-,-,18900,0.1%,2년 6개월,주1)
9,최대주주등,구영준,31332,0.2%,-,-,31332,0.2%,2년 6개월,주1)


In [360]:
tb_1 = [pd.read_html(str(x)) for x in table_src if check_table(str(x))][-1][0]

IndexError: list index out of range

In [None]:
tb_1

In [343]:
pd.read_html(str(table_src[140]))[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,구분,주주명,관계,공모전,공모전,공모후,공모후,유통가능물량,유통가능물량,매각제한물량,매각제한물량,매각제한물량
1,구분,주주명,관계,주식수,지분율,주식수,지분율,주식수,지분율,주식수,지분율,기간
2,최대주주 등,티에이어드바이저(유),최대주주,4686410,61.81%,4366410,49.01%,-,-,4366410,49.01%,상장 후 &cr2년 6개월
3,최대주주 등,도현순,대표이사,60000,0.79%,60000,0.67%,-,-,60000,0.67%,상장 후 &cr2년 6개월
4,최대주주 등,이정자,임원,41000,0.54%,41000,0.46%,-,-,41000,0.46%,상장 후 6개월
5,최대주주 등,도정희,특수관계인,50000,0.66%,50000,0.56%,-,-,50000,0.56%,상장 후 6개월
6,최대주주 등,소계,소계,4837410,63.81%,4517410,50.70%,-,-,4517410,50.70%,-
7,벤처금융 및 &cr금융투자자,㈜제니타스인베스트먼트,㈜제니타스인베스트먼트,100000,1.32%,100000,1.12%,-,-,100000,1.12%,상장 후 6개월
8,벤처금융 및 &cr금융투자자,티그리스투자조합34호,티그리스투자조합34호,476800,6.29%,476800,5.35%,-,-,476800,5.35%,상장 후 1개월
9,벤처금융 및 &cr금융투자자,티에스201812M&A투자조합,티에스201812M&A투자조합,150000,1.98%,150000,1.68%,-,-,150000,1.68%,상장 후 1개월


In [292]:
table_src = soup.select('table')

## 발행조건확정에 인수대가가 존재하는 경우
tb_1 = [pd.read_html(str(x)) for x in table_src if "공모후" in pd.read_html(str(x))[0]]


IndexError: list index out of range

In [293]:
tb_1

[  인수인   인수인.1   증권의종류     인수수량         인수금액       인수대가  인수방법
 0  대표  미래에셋증권  기명식보통주  2302064  12200939200  422304916  총액인수
 1  인수   현대차증권  기명식보통주   575515   3050229500  100657573  총액인수]

In [114]:
# C=발행공시, D=지분공시
market_dict = {"Y": "코스피","K": "코스닥", "N": "코넥스", "E": "기타"}
start_dt = '2022-01-01'
end_dt = '2022-01-31'

start_dt2 = datetime.datetime.strptime(start_dt, '%Y-%m-%d') - datetime.timedelta(30)
end_dt2 = end_dt

info_df = dart.list(start=start_dt, end=end_dt, kind_detail='C001')

check_df = info_df.loc[[True if "발행조건확정" in x else False for x in info_df.report_nm]]
b_df = info_df.loc[[True if "증권발행실적보고서" in x else False for x in info_df.report_nm]]
append_list = [x for x in b_df.corp_name if x not in list(check_df.corp_name)]

for corp in append_list:
    temp_df = dart.list(corp, end=end_dt, kind_detail='C001') # ~ end_dt 까지의 공시
    temp_df = temp_df.loc[[True if "발행조건확정" in x else False for x in temp_df.report_nm]] # 발행조건확정 보고서
    temp_df = temp_df.loc[temp_df.rcept_dt == temp_df.rcept_dt.max()] # 가장 최근 발행조건확정 보고서 가져오기
    check_df = pd.concat([check_df.loc[check_df.corp_name.isin(b_df.corp_name)], temp_df])

df1 = dart.regstate('이오플로우', '지분증권')

#xml_text = dart.document('20220110000195')
xml_text = dart.document('20220104000023')
xml_text = xml_text.replace("\n", "")

from bs4 import BeautifulSoup

soup = BeautifulSoup(xml_text, 'html.parser')

table_src = soup.select('table')

## 발행조건확정에 인수대가가 존재하는 경우
tb_1 = [pd.read_html(str(x)) for x in table_src if "인수대가" in pd.read_html(str(x))[0]][-1]

## 발행조건확정에 인수대가가 존재하지 않는 경우는 불가
## 증권신고서에 인수대가가 텍스트로 적혀있는 경우 불가

402630.9936