In [4]:
# 라이브러리
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from streamlit_option_menu import option_menu
from datetime import datetime, timedelta, date
from selenium import webdriver
from utils import *
from io import BytesIO
from pyxlsb import open_workbook as open_xlsb

import streamlit as st
import pandas as pd
import numpy as np
import OpenDartReader
import warnings
import time
import re
import os
import copy

warnings.filterwarnings('ignore')

# 파일 불러오기
def read_data(file):
    try:
        output = pd.read_excel(file)
    except:
        def read_xlsx(name):
            instance = xw.App(visible=False)
            xlsx_data = xw.Book(name).sheets[0]
            df = xlsx_data.range('A1').options(pd.DataFrame, index = False, expand = 'table').value
            instance.quit()
            instance.kill()
            return df
        output = read_xlsx(file)
    return output

# 셀레늄 실행
def get_driver():
    options = Options()
    options.add_argument('--disable-gpu')
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_argument("--disable-features=NetworkService")
    options.add_argument("--window-size=1920x1080")
    return webdriver.Chrome(options=options)

# RPA 코드 실행
def main():
    ### STEP1. 수집 시작 및 종료 날짜 할당
    # 신고서 제출일, 상장일, 수요예측일 간 차이를 반영하기 위해서 start_dt,start_dt2, start_dt3를 할당
    today = date.today()
    end_dt = datetime.strftime(today, '%Y-%m-%d')
    diff_day = timedelta(days=60)
    start_dt = datetime.strftime(today - diff_day, '%Y-%m-%d')

    ### STEP2. opendartreader 활용, 주어진 기간 내 증권신고서, 증권발행실적 보고서 가져오기
    api_key = '1b39652cef07f626c9d37375edf582ee51b1407f'
    dart = OpenDartReader(api_key)
    opt = '기업금융1부'

    market_dict = {"Y": "코스피","K": "코스닥", "N": "코넥스", "E": "기타"}

    info_df = dart.list(start=start_dt, end=end_dt, kind_detail='C001')
    info_df = pd.concat([info_df, dart.list(start=start_dt, end=end_dt, kind_detail='G002')])
    info_df = info_df.loc[[True if "증권발행실적보고서" in x else False for x in info_df.report_nm]]
    info_df = info_df.loc[info_df.corp_cls.isin(['Y', 'K'])]
    info_df.corp_cls = info_df.corp_cls.map(market_dict)
    
    ### STEP3. KIND 수집 항목 가져오기, 셀레늄 사용, 이 경우 viz_opt를 True로 함으로써 수집해야 함, False 시 조회 불가
    driver = get_driver()
    driver.set_window_size(1920, 1080)
    first_df = kind_main(driver, info_df, start_dt, end_dt)

    ### STEP4. ipo stock 수집 항목 가져오기, 셀레늄 사용
    driver = get_driver()
    driver.set_window_size(1920, 1080)
    ipo_df = ipo_main(driver, info_df)
    first_df = pd.merge(first_df, ipo_df, on = 'corp_name', how = 'left')

    ### STEP5. 38커뮤니케이션 수집 항목 가져오기
    outer_df = get_38(start_dt, end_dt)
    second_df = pd.merge(first_df, outer_df, on = 'stock_code', how = 'inner')

    ### STEP6. 현업 양식에 맞게끔 변경, opendartreader 활용, 인수인 정보 수집
    third_df = get_dd(dart, second_df)
    third_df, fourth_df = get_d_tables(dart, third_df)

    form_1 = change_form(fourth_df, opt, 1)
    form_2 = change_form(third_df, opt, 2)
    form_3 = change_form(fourth_df, opt, 3)

    form_1 = form_1.loc[form_1['상장일'] >= start_dt]
    form_2 = form_2.loc[form_2['상장일'] >= start_dt]
    form_3 = form_3.loc[form_3['상장일'] >= start_dt]
    
    ### STEP7. 파일 저장 및 갱신
    data_path = './datasets/'

    if not os.path.isdir(data_path):
        os.mkdir(data_path)

    if os.path.isfile(data_path + "corporate-finance-data.xlsx"):
        o_form1 = read_data(data_path + "form1.xlsx")
        o_form2 = read_data(data_path + "form2.xlsx")
        o_form3 = read_data(data_path + "form3.xlsx")

        o_form1 = pd.concat([o_form1, form_1]).sort_values("상장일").drop_duplicates()
        o_form2 = pd.concat([o_form2, form_2]).sort_values("청약일").drop_duplicates()
        o_form3 = pd.concat([o_form3, form_3]).sort_values("인수기관").drop_duplicates()

        o_form1.to_excel(data_path + "form1.xlsx", index = False)
        o_form2.to_excel(data_path + "form2.xlsx", index = False)
        o_form3.to_excel(data_path + "form3.xlsx", index = False)

        with pd.ExcelWriter(data_path + "corporate-finance-data.xlsx", engine = 'xlsxwriter') as writer:
            o_form1.to_excel(writer, sheet_name="01_리그테이블", index=False)
            o_form2.to_excel(writer, sheet_name="02_통합집계_Rawdata", index=False)
            o_form3.to_excel(writer, sheet_name="03_IPO현황_Summary", index=False)
    else:
        form_1.to_excel(data_path + "form1.xlsx", index = False)
        form_2.to_excel(data_path + "form2.xlsx", index = False)
        form_3.to_excel(data_path + "form3.xlsx", index = False)

        with pd.ExcelWriter(data_path + "corporate-finance-data.xlsx", engine = 'xlsxwriter') as writer:
            form_1.to_excel(writer, sheet_name="01_리그테이블", index=False)
            form_2.to_excel(writer, sheet_name="02_통합집계_Rawdata", index=False)
            form_3.to_excel(writer, sheet_name="03_IPO현황_Summary", index=False)

if __name__=="__main__":
    main()

In [6]:
data_path = './datasets/'

In [10]:
a1 = pd.read_excel(data_path + "corporate-finance-data.xlsx", sheet_name = '01_리그테이블')
a2 = pd.read_excel(data_path + "corporate-finance-data.xlsx", sheet_name = '02_통합집계_Rawdata')
a3 = pd.read_excel(data_path + "corporate-finance-data.xlsx", sheet_name = '03_IPO현황_Summary')

In [14]:
today = date.today()
end_dt = datetime.strftime(today, '%Y-%m-%d')
diff_day = timedelta(days=60)
start_dt = datetime.strftime(today - diff_day, '%Y-%m-%d')

In [17]:
a1.loc[(a1['상장일'] >= start_dt) & (a1['상장일'] <= end_dt)]

Unnamed: 0,상장일,업체,시장구분,발행금액,인수회사,인수금액,인수수수료,청약수수료추정,수수료합계,건수,주관형태,상장트랙,공모가,비율,청약일,납입일,총기관배정수량
40,2023-05-24,기가비스,코스닥,953.85094,삼성,953.85094,-,-,-,-,대표,-,43000,100.0,2023-05-15,2023-05-18,1531524
41,2023-06-01,진영,코스닥,212.5,하이,212.5,-,-,-,-,대표,-,5000,100.0,2023-05-22,2023-05-25,3187500
42,2023-06-02,나라셀라,코스닥,290.0,유진,14.5,-,-,-,-,인수,-,20000,5.0,2023-05-22,2023-05-25,2088000
43,2023-06-02,나라셀라,코스닥,290.0,신영,275.5,-,-,-,-,대표,-,20000,95.0,2023-05-22,2023-05-25,2088000
44,2023-06-08,마녀공장,코스닥,320.0,한국,288.0,-,-,-,-,대표,-,16000,90.0,2023-05-25,2023-05-31,3000000
45,2023-06-08,마녀공장,코스닥,320.0,유진,32.0,-,-,-,-,인수,-,16000,10.0,2023-05-25,2023-05-31,3000000
46,2023-06-15,큐라티스,코스닥,140.0,대신,91.0,-,-,-,-,공동대표,-,4000,65.0,2023-06-05,2023-06-09,5250000
47,2023-06-15,큐라티스,코스닥,140.0,신영,49.0,-,-,-,-,공동대표,-,4000,35.0,2023-06-05,2023-06-09,5250000
48,2023-06-16,프로테옴텍,코스닥,72.0,키움,72.0,-,-,-,-,대표,-,4500,100.0,2023-06-07,2023-06-12,1152600
49,2023-06-22,하이제8호스팩,코스닥,85.0,하이,85.0,-,-,-,-,대표,-,2000,100.0,2023-06-13,2023-06-16,3187500


In [18]:
a2

Unnamed: 0,청약일,회사명,대표주관회사,납입일,상장일,공모금액(천원),공모주수,기준가(평가가치),1차발행가액(하단),1차발행가액(상단),수요예측가중평균가,확정발행가액,결정비율,공모비율,구주매출비중,상장요건,코넥스여부,경쟁률,인수수수료(천원),수수료율
0,2023-01-10,티이엠씨,한화,2023-01-13,2023-01-19,50400000.0,1800000,-,32000,38000,-,28000,-,-,0.0,-,-,0.81 : 1,-,-
1,2023-01-10,한주라이트메탈,"미래, 현대차",2023-01-13,2023-01-19,20150000.0,6500000,-,2700,3100,-,3100,-,-,35.384615,-,-,565.18 : 1,-,-
2,2023-01-16,오브젠,한국,2023-01-19,2023-01-30,13967208.0,775956,-,18000,24000,-,18000,-,-,0.0,-,-,5.97 : 1,-,-
3,2023-01-16,미래반도체,신한,2023-01-19,2023-01-27,21600000.0,3600000,-,5300,6000,-,6000,-,-,0.0,-,-,938.27 : 1,-,-
4,2023-01-25,삼기이브이,대신,2023-01-30,2023-02-03,39072407.0,3552037,-,13800,16500,-,11000,-,-,40.000006,-,-,117.63 : 1,-,-
5,2023-01-26,스튜디오미르,미래,2023-01-31,2023-02-07,19500000.0,1000000,-,15300,19500,-,19500,-,-,20.0,-,-,1592 : 1,-,-
6,2023-01-31,꿈비,유진,2023-02-03,2023-02-09,10000000.0,2000000,-,4000,4500,-,5000,-,-,12.5,-,-,1772.59 : 1,-,-
7,2023-02-06,샌즈랩,키움,2023-02-09,2023-02-15,38850000.0,3700000,-,8500,10500,-,10500,-,-,18.918919,-,-,868.07 : 1,-,-
8,2023-02-07,제이오,한국,2023-02-10,2023-02-16,52000000.0,4000000,-,10000,13000,-,13000,-,-,0.0,-,-,142.47 : 1,-,-
9,2023-02-09,이노진,IBK,2023-02-14,2023-02-20,7800000.0,2600000,-,2500,3000,-,3000,-,-,0.0,-,-,1643.88 : 1,-,-


In [15]:
a3.loc[(a3['상장일'] >= start_dt) & (a3['상장일'] <= end_dt)]

Unnamed: 0,인수기관,청약일,회사명,대표주관회사,인수회사,납입일,상장일,공모금액(백만원),공모주수,공모가,인수수수료,인수비율
0,DB,2023-07-03,DB금융스팩11호,DB,DB,2023-07-06,2023-07-12,10000.0,5000000,2000,0,100.0
3,KB,2023-06-13,KB제25호스팩,KB,KB,2023-06-16,2023-06-23,8000.0,4000000,2000,0,100.0
8,NH,2023-06-13,엔에이치스팩29호,NH,NH,2023-06-16,2023-06-23,25500.0,12750000,2000,0,100.0
9,NH,2023-06-20,알멕,NH,NH,2023-06-23,2023-06-30,50000.0,1000000,50000,0,100.0
12,교보,2023-06-26,교보14호스팩,교보,교보,2023-06-29,2023-07-06,7700.0,3850000,2000,0,100.0
15,대신,2023-06-05,큐라티스,"대신, 신영","대신, 신영",2023-06-09,2023-06-15,9100.0,3500000,4000,0,65.0
16,미래,2023-07-05,필에너지,미래,"미래, 삼성",2023-07-10,2023-07-14,81281.25,2812500,34000,0,85.0
25,삼성,2023-07-10,센서뷰,삼성,삼성,2023-07-13,2023-07-19,17550.0,3900000,4500,0,100.0
26,삼성,2023-07-05,필에너지,미래,"미래, 삼성",2023-07-10,2023-07-14,14343.75,2812500,34000,0,15.0
27,삼성,2023-05-15,기가비스,삼성,삼성,2023-05-18,2023-05-24,95385.094,2218258,43000,0,100.0
