## Import

In [1]:
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementNotInteractableException
from selenium import webdriver
from tqdm import tqdm
from bs4 import BeautifulSoup
from IB_utils import *

import requests
import streamlit as st
import pandas as pd
import numpy as np
import OpenDartReader
import warnings
import time, datetime
import re, os

warnings.filterwarnings('ignore')

In [11]:
# load data
def read_xlsx(name):
    instance = xw.App(visible=False)
    xlsx_data = xw.Book(name).sheets[0]
    df = xlsx_data.range('A1').options(pd.DataFrame, index = False, expand = 'table').value
    instance.quit()
    instance.kill()
    return df

def read_data(file):
    try:
        output = pd.read_excel(file)
    except:
        output = read_xlsx(file)
    return output

In [2]:
options = Options()
options.add_argument('--headless')

def get_driver(viz_opt = False):
    #return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    if viz_opt:
        return webdriver.Chrome()
    else:
        return webdriver.Chrome(options=options)

In [7]:
data_path = './datasets/'

if not os.path.isdir(data_path):
    os.mkdir(data_path)

In [26]:
def set_kind(driver, start_dt, end_dt):
    driver.get('https://kind.krx.co.kr/listinvstg/pubofrprogcom.do?method=searchPubofrProgComMain')

    wait = WebDriverWait(driver, 10, poll_frequency=0.25)
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ord-02')))
    
    # 시작일
    start_date = driver.find_elements(By.ID, 'fromDate')[0]
    start_date.clear()
    time.sleep(0.25)
    start_date.send_keys(start_dt)
    start_date.send_keys(Keys.ESCAPE)
    
    # 종료일
    end_date = driver.find_elements(By.ID, 'toDate')[0]
    end_date.clear()
    time.sleep(0.25)
    end_date.send_keys(end_dt)
    end_date.send_keys(Keys.ESCAPE)
    
    # 검색
    search = driver.find_elements(By.CLASS_NAME, 'btn-sprite.type-00.vmiddle.search-btn')
    search[0].click()
    time.sleep(0.5)

    # 페이지 수 선택
    pages = driver.find_elements(By.ID, 'currentPageSize')
    pages[1].click()
    time.sleep(0.5)

    # 100건
    driver.find_elements(By.XPATH, '/html/body/section[2]/section/article/section[2]/div[2]/select/option[4]')[0].click()
    time.sleep(0.25)

    # 적용
    driver.find_elements(By.CLASS_NAME, 'btn-sprite.btn-go.vmiddle')[0].click()
    time.sleep(1)

    # 테이블 가져오기
    table_src = pd.read_html(driver.page_source)
    table = [x for x in table_src if '수요예측일정' in x][0]
    # 상세 페이지 번호 가져오기
    rows = driver.find_elements(By.XPATH, '/html/body/section[2]/section/article/section[1]/table/tbody/tr')
    table['inner'] = [re.sub("[^0-9]", "", x.get_attribute('onclick')) for x in rows]
    
    return table

In [79]:
check_corp = ['프로테옴텍', '큐라티스', '마녀공장', '나라셀라', '진영', '기가비스']

In [120]:
opt = 'IB전략'
start_dt = '2023-03-01'
end_dt = '2023-05-31'
#start_dt = '2023-07-01'
#end_dt = '2023-07-20'
start_dt2 = datetime.datetime.strftime(datetime.datetime.strptime(end_dt, '%Y-%m-%d') - datetime.timedelta(days = 80), '%Y-%m-%d')
start_dt3 = datetime.datetime.strftime(datetime.datetime.strptime(end_dt, '%Y-%m-%d') - datetime.timedelta(days = 180), '%Y-%m-%d')

dart_df, dart = initial_set(start_dt2, end_dt)

# kind
driver = get_driver(viz_opt = True)
driver.set_window_size(1920, 1080)

table = set_kind(driver, start_dt3, end_dt)
kind_output = get_kind_inner(driver, table)
first_df = post_proc(dart_df, kind_output, start_dt)

# ipo stock
driver = get_driver()
driver.set_window_size(1920, 1080)
ipo_df = ipo_main(driver, first_df)
first_df = pd.merge(first_df, ipo_df, on = 'corp_name', how = 'left')
first_df.replace(np.NaN, 0, inplace = True)
first_df['key'] = [change_join(x) if "스팩" in x else x for x in list(first_df.corp_name)]

outer_df = get_38(start_dt, end_dt)
second_df = pd.merge(first_df, outer_df, left_on = 'key', right_on = '기업명', how = 'inner')
del second_df['기업명'], second_df['key'], second_df['stock_code_x']
second_df.rename(columns = {'stock_code_y':'stock_code'}, inplace = True)

third_df = get_dd(dart, second_df)
third_df, fourth_df = get_d_tables(dart, third_df)
head_df = change_form(third_df, opt)

In [117]:
if os.path.isfile(data_path + "ib-strategy-data.xlsx"):
    origin_df = read_data(data_path + "ib-strategy-data.xlsx")
    origin_df = pd.concat([origin_df, head_df]).sort_values("수요예측(시작일)").drop_duplicates()
    origin_df.to_excel(data_path + "ib-strategy-data.xlsx", index = False)
else:
    head_df.to_excel(data_path + "ib-strategy-data.xlsx", index = False)