In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [4]:
url = 'https://en.wikipedia.org/wiki/2024_Summer_Olympics_medal_table'

In [5]:
from langchain_community.document_loaders import AsyncHtmlLoader
from bs4 import BeautifulSoup

loader = AsyncHtmlLoader([url])
docs = loader.load()
raw_html = docs[0].page_content

# Criar o objeto BeautifulSoup
soup = BeautifulSoup(raw_html, 'html.parser')

# Buscar pela tag <table> com a classe espec√≠fica
table = soup.find('table', class_='wikitable sortable notheme plainrowheaders jquery-tablesorter')

Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.12it/s]


In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

messages = [
    ("system", "Transforme em uma table markdown:"),
    ("human", "{table}")
]
prompt = ChatPromptTemplate.from_messages(messages)

# <LLM model>
llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0, model='gpt-4o-mini-2024-07-18')

chain = prompt | llm | StrOutputParser()

In [8]:
final_table = chain.invoke({"table": table})

In [9]:
print(final_table)

```markdown
| Rank | NOC       | Gold | Silver | Bronze | Total |
|------|-----------|------|--------|--------|-------|
| 1    | China     | 11   | 7      | 6      | 24    |
| 2    | United States | 9    | 15     | 13     | 37    |
| 3    | France    | 8    | 11     | 8      | 27    |
| 4    | Australia | 8    | 6      | 4      | 18    |
| 5    | Japan     | 8    | 3      | 5      | 16    |
| 6    | Great Britain | 6    | 7      | 7      | 20    |
| 7    | South Korea | 6    | 3      | 3      | 12    |
| 8    | Italy     | 5    | 7      | 4      | 16    |
| 9    | Canada    | 3    | 2      | 3      | 8     |
| 10   | Germany   | 2    | 2      | 2      | 6     |
| 10   | Netherlands | 2    | 2      | 2      | 6     |
| 12   | New Zealand | 2    | 2      | 1      | 5     |
| 13   | Romania   | 2    | 1      | 1      | 4     |
| 14   | Hong Kong | 2    | 0      | 2      | 4     |
| 15   | Azerbaijan | 2    | 0      | 0      | 2     |
| 16   | Hungary   | 1    | 2      | 1      | 4     |
|

## Testing API

In [26]:
import json

In [18]:
import requests

endpoint = "http://127.0.0.1:8000/"

response = requests.get(endpoint)

if response.status_code == 200:
    print(response.content)
else:
    print("Erro ao acessar a API")

b'{"STATUS":"OK"}'


In [20]:
response = requests.get(endpoint + 'medal_table')

In [29]:
if response.status_code == 200:
    print(json.loads(response.content)['medal_table'])
else:
    print("Erro ao acessar a API")

```markdown
| Rank | NOC       | Gold | Silver | Bronze | Total |
|------|-----------|------|--------|--------|-------|
| 1    | China     | 11   | 7      | 6      | 24    |
| 2    | United States | 9    | 15     | 13     | 37    |
| 3    | France    | 8    | 11     | 8      | 27    |
| 4    | Australia | 8    | 6      | 4      | 18    |
| 5    | Japan     | 8    | 3      | 5      | 16    |
| 6    | Great Britain | 6    | 7      | 7      | 20    |
| 7    | South Korea | 6    | 3      | 3      | 12    |
| 8    | Italy     | 5    | 7      | 4      | 16    |
| 9    | Canada    | 3    | 2      | 3      | 8     |
| 10   | Germany   | 2    | 2      | 2      | 6     |
| 10   | Netherlands | 2    | 2      | 2      | 6     |
| 12   | New Zealand | 2    | 2      | 1      | 5     |
| 13   | Romania   | 2    | 1      | 1      | 4     |
| 14   | Hong Kong | 2    | 0      | 2      | 4     |
| 15   | Azerbaijan | 2    | 0      | 0      | 2     |
| 16   | Hungary   | 1    | 2      | 1      | 4     |
|