In [1]:
import sys
from pathlib import Path
project_path = str(Path.cwd().parent)
sys.path.append(project_path)
%autoawait asyncio

In [12]:
from typing import List, Dict, Union, Optional
import asyncio
import aiohttp
import re

from bs4 import BeautifulSoup
from async_retrying import retry

from apps.ipo.agents import get_user_agents


async def extract_data_from_table3(table: BeautifulSoup) -> Dict[str, str]:
    """
    Extracts data from the third table in the HTML page and returns a dictionary with keys as the data categories
    and values as the data for each category.

    Parameters:
    - table (BeautifulSoup): The BeautifulSoup object representing the third table in the HTML page.

    Returns:
    - Dict[str, str]: A dictionary containing the extracted data.
    """
    ci_public_offering_stocks , *rest_trs= table.select("tr")
    ci_public_offering_stocks[0].select('td:nth-child(2)')
    keys = [
        "ci_professional_investor_stock",
        "ci_professional_investor_rate",
        "ci_esa_stock",
        "ci_esa_rate",
        "ci_general_subscriber_stock",
        "ci_general_subscriber_rate",
        "ci_overseas_investor_stock",
        "ci_overseas_investor_rate",
    ]
    result = []
    trs = rest_trs.select("tr")[1:]
    for tr in rest_trs:
        tds = tr.select("td")
        for td in tds:
            if re.search(r"\d+[,\d+]*", td.text):
                result.append(td.text)
    result = {key: value for key, value in zip(keys, result)}
    return result





@retry(attempts=100)
async def scrape_ipostock(code: str) -> Dict[str, Union[str, Dict[str, str], List[Dict[str, str]]]]:
    """
    Scrapes financial data for a given company code from the website ipostock.co.kr.

    Parameters:
    - code (str): The company code to scrape financial data for.

    Returns:
    - Dict[str, Union[str, Dict[str, str], List[Dict[str, str]]]]: A dictionary containing the scraped financial data.
    """
    
    code = "B202010131"
    header = await get_user_agents()
    url = f"http://www.ipostock.co.kr/view_pg/view_04.asp?code={code}"

    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers=header) as resp:
                soup = BeautifulSoup(await resp.text(), "lxml")
            soup = BeautifulSoup(await resp.text(), "lxml")
    except (aiohttp.ClientError, asyncio.TimeoutError) as e:
        print("Request failed, retrying in 5 seconds...")
        print(e)
        await asyncio.sleep(1)

    table1, table2, table3, table4, *_ = soup.select("table.view_tb")

    
    
    await extract_data_from_table3(table3)





await scrape_ipostock(1)


TypeError: sleep() got an unexpected keyword argument 'loop'

In [14]:
import requests

code = "B202010131"
url = f"http://www.ipostock.co.kr/view_pg/view_04.asp?code={code}"
r  = requests.get(url)

In [19]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(r.content, "lxml")
table1, table2, table3, table4, *_ = soup.select("table.view_tb")

In [20]:
table3

<table class="view_tb" width="350">
<tr align="left" bgcolor="#FFFFFF">
<td align="center" bgcolor="F0F0F0" colspan="2" height="25" width="110"><font color="#666666"><b>공모주식수</b></font></td>
<td align="center" colspan="2" width="*"><b>1,200,000 주 (모집100%)</b></td>
</tr>
<tr align="left" bgcolor="#FFFFFF">
<td align="center" bgcolor="F0F0F0" rowspan="4" width="42"><font color="#666666">그룹별<br/><br/>배 정</font></td>
<td bgcolor="F0F0F0" height="26">  <font color="#666666">전문투자자</font></td>
<td align="center" width="125">900,000 주</td>
<td align="center" bgcolor="#FFFFFF" width="90">75 %</td>
</tr>
<tr align="center" bgcolor="#FFFFFF">
<td bgcolor="F0F0F0" height="26"><font color="#666666">우리사주조합</font></td>
<td>0 주</td>
<td bgcolor="#FFFFFF">0 %</td>
</tr>
<tr align="center" bgcolor="#FFFFFF">
<td bgcolor="F0F0F0" height="26"><font color="#666666"><b>일반청약자</b></font></td>
<td><b>300,000 주</b></td>
<td bgcolor="#FFFFFF"><b>25 %<b></b></b></td>
</tr>
<tr align="center" bgcolor="#FFFFFF">
<t

In [43]:
ci_public_offering_stocks, *rest_trs = table3.select("tr")
instance = ci_public_offering_stocks.select_one('td:nth-child(2) > b')
data = None
ci_public_offering_stocks = 0
if instance is None:
    pass
else : 
    data = instance.get_text().split('주')[0].strip().replace(',', '')
    if data is '' :
        pass
    ci_public_offering_stocks = int(data)
# ci_public_offering_stocks.select("td:nth-child(2)")
# keys = [
#     "ci_professional_investor_stock",
#     "ci_professional_investor_rate",
#     "ci_esa_stock",
#     "ci_esa_rate",
#     "ci_general_subscriber_stock",
#     "ci_general_subscriber_rate",
#     "ci_overseas_investor_stock",
#     "ci_overseas_investor_rate",
# ]
# result = []
# trs = rest_trs.select("tr")[1:]
# for tr in rest_trs:
#     tds = tr.select("td")
#     for td in tds:
#         if re.search(r"\d+[,\d+]*", td.text):
#             result.append(td.text)
# result = {key: value for key, value in zip(keys, result)}

1200000
