In [1]:
import pandas as pd
from tqdm import tqdm
import logging

from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException,
    TimeoutException,
    WebDriverException
)

# ⚙️ Настройка логгера
logging.basicConfig(
    filename="university_links.log",
    filemode='a',
    format='%(asctime)s | %(levelname)s | %(message)s',
    level=logging.INFO,
    encoding='utf-8'
)

def find_university_link(university_name: str, driver_path: str) -> str | None:
    try:
        driver = webdriver.Chrome(service=ChromeService(executable_path=driver_path))
        driver.get("https://www.4icu.org/")

        search = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, 'input'))
        )
        search.clear()
        search.send_keys(university_name)

        button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="___gcse_0"]/div/div/form/table/tbody/tr/td[2]/button'))
        )
        button.click()

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.gsc-webResult.gsc-result'))
        )

        results = driver.find_elements(By.CSS_SELECTOR, '.gsc-webResult.gsc-result')
        if results:
            univer_ref = results[0]
            univer_page = univer_ref.find_element(By.TAG_NAME, 'a').get_attribute('href')
            logging.info(f"{university_name} | Успешно найден: {univer_page}")
            return univer_page
        else:
            logging.warning(f"{university_name} | Университет не найден")
            return None

    except TimeoutException:
        logging.error(f"{university_name} | Ошибка: таймаут ожидания")
        return None
    except NoSuchElementException:
        logging.error(f"{university_name} | Ошибка: элемент не найден")
        return None
    except WebDriverException as e:
        logging.critical(f"{university_name} | WebDriver: {e}")
        return None
    except Exception as e:
        logging.critical(f"{university_name} | Непредвиденная ошибка: {e}")
        return None
    finally:
        try:
            driver.quit()
        except:
            pass
        
df = pd.read_excel('Profiles_Data_250425.xlsx', sheet_name = 1)

# 🚀 Основной блок
if __name__ == "__main__":
    driver_path = "C:/Users/Eugene/chromedriver-win64/chromedriver.exe"

    for idx, row in tqdm(df[df["Website"].isna()].iterrows(), total=df["Website"].isna().sum(), desc="Обновление ссылок"):
        university = row["University Name"]
        link = find_university_link(university, driver_path)  # ← используем уже определённую функцию
        df.at[idx, "Website"] = link  # записываем по индексу

# записываем заполненный датафрейм в новый файл Profiles_Data.xlsx
df.to_excel("Profiles_Data.xlsx", index=False)