# üöÄ Web Scraping do GLOBE com Selenium

Este notebook realiza web scraping no site do GLOBE, lidando com pagina√ß√£o e salvando os dados extra√≠dos em JSON.

In [None]:

# Instalar as bibliotecas necess√°rias
!pip install selenium webdriver-manager
    

In [None]:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import json
import time

# Configura√ß√£o do WebDriver
chrome_options = Options()
chrome_options.add_argument("--headless")  # Roda sem abrir o navegador
service = Service(ChromeDriverManager().install())  # Garante a vers√£o correta do ChromeDriver

driver = webdriver.Chrome(service=service, options=chrome_options)

# URL do site do GLOBE
URL = "https://www.globe.gov/do-globe/research-resources/student-research-reports"
driver.get(URL)
time.sleep(3)  # Espera carregar a p√°gina

# Lista para armazenar os dados extra√≠dos
reports = []

while True:
    # Encontra os elementos da p√°gina
    elements = driver.find_elements(By.CSS_SELECTOR, ".project-item")  # Ajuste conforme necess√°rio

    for el in elements:
        try:
            title = el.find_element(By.CSS_SELECTOR, ".project-title").text.strip()
            country = el.find_element(By.CSS_SELECTOR, ".project-country").text.strip()
            educator = el.find_element(By.CSS_SELECTOR, ".project-educator").text.strip() if el.find_elements(By.CSS_SELECTOR, ".project-educator") else "N/A"
            protocols = ", ".join([p.text.strip() for p in el.find_elements(By.CSS_SELECTOR, ".project-protocol")])
            language = el.find_element(By.CSS_SELECTOR, ".project-language").text.strip() if el.find_elements(By.CSS_SELECTOR, ".project-language") else "N/A"
            date_submitted = el.find_element(By.CSS_SELECTOR, ".project-date").text.strip()

            reports.append({
                "Title": title,
                "Country": country,
                "Educator": educator,
                "Protocols": protocols,
                "Language": language,
                "Date Submitted": date_submitted
            })
        except Exception as e:
            print(f"Erro ao coletar um item: {e}")

    # Tenta encontrar e clicar no bot√£o de pr√≥xima p√°gina
    try:
        next_button = driver.find_element(By.LINK_TEXT, "Next")  # Ajuste conforme necess√°rio
        next_button.click()
        time.sleep(3)  # Espera carregar a nova p√°gina
    except:
        print("Fim da pagina√ß√£o ou erro ao mudar de p√°gina.")
        break

# Fechar o navegador
driver.quit()

# Salvar os dados extra√≠dos em JSON
with open("globe_data.json", "w", encoding="utf-8") as f:
    json.dump(reports, f, indent=4, ensure_ascii=False)

print(f"Extra√ß√£o conclu√≠da! {len(reports)} relat√≥rios salvos.")
    

### ‚úÖ **Conclus√£o**
Este notebook executa o web scraping do site do GLOBE, navegando automaticamente entre as p√°ginas e salvando os dados em JSON.