In [None]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

# Initialize Selenium WebDriver with options to run in headless mode (without opening a browser window)
options = Options()
options.headless = True  # Run in the background (no browser window)

# Initialize WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Step 1: Open the webpage with Selenium
url = "https://www.czba.cz/en/map-of-biogas-plants.html"
driver.get(url)

# Step 2: Wait for the page to load completely
time.sleep(5)  # Adjust the sleep time based on the page load time

# List to store plant details
plant_details = []

# Function to extract plant details from the opened detail table
def extract_plant_details():
    try:
        # Extract plant name
        plant_name = driver.find_element(By.CSS_SELECTOR, 'body > div.layoutContent.mb-4 > div > div > div:nth-child(1) > h1').text.strip()

        # Extract the plant detail table
        detail_div = driver.find_element(By.CSS_SELECTOR, 'body > div.layoutContent.mb-4 > div:nth-child(1) > div > div.col-12.col-lg-5')
        rows = detail_div.find_elements(By.TAG_NAME, 'tr')

        plant_info = {'Plant Name': plant_name}  # Initialize with the plant name
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, 'td')
            if len(cols) > 0:  # Only consider rows that have data in the second column
                label = row.find_element(By.TAG_NAME, 'th').text.strip()
                value = cols[0].text.strip()

                # Store the relevant details
                if label == "Installed electric power:":
                    plant_info['KW Number'] = value
                elif label == "Type:":
                    plant_info['Type'] = value
                elif label == "Region:":
                    plant_info['Region'] = value
                elif label == "District:":
                    plant_info['District'] = value
                elif label == "City:":
                    plant_info['City'] = value
                elif label == "Starting date of operation:":
                    plant_info['Start Year'] = value
                elif label == "Installed heat power:":
                    plant_info['Heat Power'] = value  # Add heat power if found

        return plant_info
    except Exception as e:
        print(f"Error extracting details: {e}")
        return None

# Step 3: Loop through and extract details for all plants across all pages
page_number = 1
last_page = None  # Track the last visited page to avoid loops

while True:
    print(f"Scraping page {page_number}...")

    try:
        # Wait for the page to load
        time.sleep(3)

        # Check if stuck on the same page
        current_page_element = driver.find_element(By.CSS_SELECTOR, 'body > div.layoutContent.mb-4 > div > div > div > nav > ul > li.active > a')
        current_page_number = current_page_element.text.strip()

        if current_page_number == last_page:
            print("Stuck on the same page, exiting loop.")
            break
        last_page = current_page_number

        # Find all "+ Detail" buttons
        detail_buttons = driver.find_elements(By.CSS_SELECTOR, 'table > tbody > tr > td > a')

        # Loop over the buttons and click each one
        for button in detail_buttons:
            try:
                button.click()
                time.sleep(2)

                # Extract the plant details
                plant_info = extract_plant_details()
                if plant_info:
                    plant_details.append(plant_info)

                # Go back to the previous page
                driver.back()
                time.sleep(1)
            except Exception as e:
                print(f"Error with the detail button: {e}")

        # Move to the next page
        next_button = driver.find_elements(By.CSS_SELECTOR, 'body > div.layoutContent.mb-4 > div > div > div > nav > ul > li:nth-child(6) > a')
        if next_button:
            next_button[0].click()
            page_number += 1
            time.sleep(5)
        else:
            break
    except Exception as e:
        print(f"Error on page {page_number}: {e}")
        page_number += 1  # Skip to the next page to avoid being stuck
        continue

# Step 6: Save the collected data to a CSV file
if plant_details:
    df = pd.DataFrame(plant_details)
    df.to_csv('biogas_plants_details.csv', index=False, encoding='utf-8-sig')
    print("Scraping complete, data saved to 'biogas_plants_details.csv'")
else:
    print("No data scraped.")

# Close the browser
driver.quit()
