In [1]:
import sys
from pathlib import Path
project_path = str(Path.cwd().parent)
sys.path.append(project_path)

import sys
import requests

from bs4 import BeautifulSoup
from agents import get_user_agents
from collections import ChainMap


def extract_data_from_table1(table):
    td = table.select_one("tr > td")
    img_src = td.select_one("img")
    result = {
        "ci_market_separation":img_src.get("src", ""),
        "ci_name":td.select_one(".view_tit").text,
        "ci_code":td.select_one(".view_txt01").text
        }


    return result


def extract_data_from_table2(table):
    keys = [
        "ci_ceo",
        "ci_establishment_date",
        "ci_company_separation",
        "ci_brn",
        "ci_tel",
        "ci_homepage",
        "ci_settlement_month",
        "ci_worker_cnt",
        "ci_industries",
        "ci_important_product",
        "ci_stocks_separation",
        "ci_lead_manager",
        "ci_address",
    ]

    result = []
    trs = table.select("tr")
    for idx_tr, tr in enumerate(trs):
        tds = tr.select("td.txt")
        result.extend(td.text if td.text is not None else "" for td in tds)
    result = dict(zip(keys, result))

    return result


def extract_data_from_table3(table):
    keys = [
        "ci_review_c_date",
        "ci_review_a_date",
        "ci_turnover",
        "ci_before_corporate_tax",
        "ci_net_profit",
        "ci_capital",
        "ci_largest_shareholder",
        "ci_largest_shareholder_rate",
        "ci_po_expected_price",
        "ci_public_offering_stocks",
        "ci_po_expected_amount",
        "ci_listing_expected_stocks",
    ]
    tds = table.select('tr > td[width="240"]')
    result = [td.text if td.text is not None else "" for td in tds]
    return dict(zip(keys, result))


def scrape_ipostock(code):

    url = f"http://www.ipostock.co.kr/view_pg/view_01.asp?code={code}"



    req = requests.get(url)
    soup = BeautifulSoup(req.content, "lxml", from_encoding="utf-8")
    table1 = soup.find("table", width="550", style="margin:0 auto;")
    table2, table3 = soup.select('table[width="780"][class="view_tb"]')

    result1 = extract_data_from_table1(table1)
    result2 = extract_data_from_table2(table2)
    result3 = extract_data_from_table2(table3)

    
    result = {
        **extract_data_from_table1(table1),
        **extract_data_from_table2(table2),
        **extract_data_from_table3(table3),
    }
    return result


if __name__ == "__main__":
    # 바이오 노트
    # code = "B202206162"
    # 래몽래인
    code = "B202010131"
    result = scrape_ipostock(code)

    from schemas.general import GeneralCreateSchema

    g = GeneralCreateSchema(**result)
    from pprint import pprint as pp

    pp(g.dict())


{'ci_address': '서울특별시 강남구 학동로25길 19',
 'ci_after_po_capital': 0.0,
 'ci_after_po_stocks': 0.0,
 'ci_appraised_price': 0,
 'ci_ask_tel': '',
 'ci_attractiveness': '',
 'ci_attractiveness_name': '',
 'ci_attractiveness_score': 0,
 'ci_before_corporate_tax': 0.0,
 'ci_before_po_capital': 0.0,
 'ci_before_po_stocks': 0.0,
 'ci_big_ir_plan': '',
 'ci_brn': '114-86-57714',
 'ci_capital': 0.0,
 'ci_ceo': '김동래',
 'ci_code': '200350',
 'ci_comment': '',
 'ci_company_separation': '벤처',
 'ci_competition_rate': '',
 'ci_confirm_po_amount': 0.0,
 'ci_confirm_po_price': 0,
 'ci_current_ratio': 0,
 'ci_datetime': datetime.datetime(2023, 1, 15, 14, 0, 43, 703258),
 'ci_demand_forecast_date': '',
 'ci_demand_result': 'Y',
 'ci_demand_result_datetime': datetime.datetime(2023, 1, 15, 14, 0, 43, 703257),
 'ci_demand_result_state': 'upload',
 'ci_demand_schedule': 'Y',
 'ci_demand_schedule_datetime': datetime.datetime(2023, 1, 15, 14, 0, 43, 703255),
 'ci_demand_schedule_state': 'upload',
 'ci_dislike': 0,

In [7]:
# 바이오 노트
code = "B202206162"
# 래몽래인
# code = "B202010131"
url = "http://www.ipostock.co.kr/view_pg/view_01.asp?code={code}&gmenu="
result = scrape_ipostock(url)

# from schemas.general import GeneralCreateSchema

# g = GeneralCreateSchema(**result)
# from pprint import pprint as pp

# pp(g)



