UsageError: Line magic function `%autowait` not found.


In [22]:
import sys
from pathlib import Path
project_path = str(Path.cwd().parent)
sys.path.append(project_path)

from typing import List, Dict, Union, Tuple
import asyncio
import aiohttp


from bs4 import BeautifulSoup
from apps.ipo.agents import get_user_agents
from config.config_log import logging
import requests

logger = logging.getLogger("info-logger")


async def extract_data_from_table1(table: BeautifulSoup) -> List[Dict[str, str]]:
    """
    Extracts data from the first table in the HTML page and returns a list of dictionaries, where each dictionary
    represents a row of data, with keys as the data categories and values as the data for each category.

    Parameters:
    - soup (BeautifulSoup): The BeautifulSoup object representing the HTML page.

    Returns:
    - List[Dict[str, str]]: A list of dictionaries containing the extracted data.
    """

    try:
        keys = [
            "ci_price",
            "ci_incidence",
            "ci_incidence_specific_gravity",
            "ci_participation",
            "ci_participation_specific_gravity",
        ]
        results = []
        trs = table.select("tr")[2:-1]
        for tr in trs:
            tds = tr.select("td")
            empty_string = tds[0].text.strip().replace(" ", "")
            if not empty_string:
                break
            temp = []
            for td in tds:
                temp.append(td.text)
            results.append(temp)

        result = [dict(zip(keys, result)) for result in results]

        return result
    except AttributeError as err:
        logger.error(err)
        result = [
            {
                "ci_price": "",
                "ci_incidence": 0,
                "ci_incidence_specific_gravity": 0.0,
                "ci_participation": 0,
                "ci_participation_specific_gravity": 0.0,
            }
        ]
        return result


async def extract_data_from_table2(table: BeautifulSoup) -> Dict[str, Union[str, float]]:
    """
    Extracts data from the second table in the HTML page and returns a dictionary with keys as the data categories
    and values as the data for each category.

    Parameters:
    - soup (BeautifulSoup): The BeautifulSoup object representing the HTML page.

    Returns:
    - Dict[str, Union[str, float]]: A dictionary containing the extracted data.
    """
    result = {"ci_competition_rate": "", "ci_promise_content": "", "ci_promise_rate": 0.0}
    if not table:
        return result
    try:
        result = {
            "ci_competition_rate": table.select_one("tr:nth-of-type(1) > td:nth-of-type(2) > font > strong").get_text().replace(" ", "").replace("\xa0", ""),
            "ci_promise_content": table.select_one("tr:nth-of-type(2) > td:nth-of-type(2)").get_text().strip(),
            "ci_promise_rate": table.select_one("tr:nth-of-type(3) > td:nth-of-type(2)").get_text().strip()
        }
        return result
    except (AttributeError, IndexError):
        return result



async def scrape_ipostock(code: str) -> Tuple[List[Dict[str, str]], Dict[str, Union[str, float]]]:
    """
    Scrapes data from the webpage for the given stock code and returns a tuple of the extracted data from the two tables on the page.

    Parameters:
    - code (str): The stock code of the company.

    Returns:
    - Tuple[List[Dict[str, str]], Dict[str, Union[str, float]]]: A tuple containing the extracted data.
    """
    url = f"http://www.ipostock.co.kr/view_pg/view_05.asp?code={code}"
    header = await get_user_agents()
    try:
        # async with aiohttp.ClientSession() as session:
        #     async with session.get(url, headers=header) as resp:
        #         soup = BeautifulSoup(await resp.text(), "lxml")
        # async with aiohttp.ClientSession() as session:
            # async with session.get(url, headers=header) as resp:
        resp = requests.get(url, headers=header)
        soup = BeautifulSoup(resp.content, "lxml")
    except (aiohttp.ClientError, asyncio.TimeoutError) as e:
        print("Request failed, retrying in 5 seconds...")
        print(e)
        await asyncio.sleep(0.3)
    table1 = soup.find("table", width="780", cellpadding="0", class_="view_tb")
    table2 = soup.find_all("table", width="780", cellspacing="1", class_="view_tb2")

    t1, t2 = await asyncio.gather(
        extract_data_from_table1(table1),
        extract_data_from_table2(table2),
    )
    print(t1, t2)
    return t1, t2


if __name__ == "__main__":
    
    #print > table > tbody > tr:nth-child(5) > td > table.view_tb > tbody > tr:nth-child(3) > td:nth-child(1)
    #"print > table > tr:nth-child(5) > td > table.view_tb > tr:nth-child(3) > td"
    async def main():

        #        code = "B202010131"
        # code = "B202010131"
        code = "B202105031"
        # code = '유안타제12호스팩'
        code = "B202211161"
        # 비트나인
        # code = "B202104292"
        prediction_result, general_result = await scrape_ipostock(code)

        from schemas.general import GeneralCreateSchema
        from schemas.prediction import PredictionCreateSchema

        g = GeneralCreateSchema(**general_result)
        s = [PredictionCreateSchema(**data) for data in prediction_result or []]

        # print(g)
        # print(s)
        # from pprint import pprint as pp

        # # pp(g.dict())
        # # pp(g.dict()["ci_competition_rate"])
        # # pp(g.dict()["ci_promise_rate"])
        # # pp(g.dict()["ci_promise_content"])
        # si1 = s[0].dict()
        # si2 = s[1].dict()
        # si3 = s[2].dict()
        # si4 = s[3].dict()
        # print(si1)
        # print(si2)
        # print(si3)
        # print(si4)
await main()

ERROR | 2023-01-26 17:09:26 | 835300370 | 55 | 'NoneType' object has no attribute 'select'
ERROR | 2023-01-26 17:09:27 | 835300370 | 105 | ResultSet object has no attribute 'select_one'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?


[{'ci_price': '', 'ci_incidence': 0, 'ci_incidence_specific_gravity': 0.0, 'ci_participation': 0, 'ci_participation_specific_gravity': 0.0}] {'ci_competition_rate': '', 'ci_promise_content': '', 'ci_promise_rate': 0.0}


In [21]:
code = "B202211161"
url = f"http://www.ipostock.co.kr/view_pg/view_05.asp?code={code}"
header = await get_user_agents()
try:
    # async with aiohttp.ClientSession() as session:
    #     async with session.get(url, headers=header) as resp:
    #         soup = BeautifulSoup(await resp.text(), "lxml")
    # async with aiohttp.ClientSession() as session:
        # async with session.get(url, headers=header) as resp:
    resp = requests.get(url, headers=header)
    soup = BeautifulSoup(resp.content, "lxml")
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
    print("Request failed, retrying in 5 seconds...")
    print(e)
    await asyncio.sleep(0.3)
table1 = soup.find("table", width="780", cellpadding="0", class_="view_tb")
table2 = soup.find_all("table", width="780", cellspacing="1", class_="view_tb2")[-1]
table1, table2

(None,
 <table bgcolor="D2D2D2" bordercolor="D2D2D2" cellpadding="1" cellspacing="1" class="view_tb2" style="margin:10px 0px 0px 0px;" width="780">
 <tr align="left" bgcolor="#FFFFFF">
 <td bgcolor="F0F0F0" width="132">  <font color="#666666">참고사항</font></td>
 <td height="26" style="line-height:130%; padding:3px 3px 3px 3px;" width="*"><font color="red">*금번 수요예측에 참여하는 기관투자자는 15일, 1개월, 3개월, 6개월의 의무보유기간을 확약가능.</font></td>
 </tr>
 </table>)