# Web Scraper Wasserstände KT Zuerich

## Libraries and settings

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Web Scraper Hydrologische Daten

In [2]:
# Send an HTTP request to the URL
url = 'https://hydroproweb.zh.ch/Listen/AktuelleWerte/aktuelle_werte.html'
response = requests.get(url)
html_content = response.content

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the table and extract values
table = soup.find('table')

# Extract table headers
headers = [header.get_text().replace('\xa0', '') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    row_data = [cell.get_text().replace('\xa0', '').strip() for cell in cells]
    rows.append(row_data)

# Split the combined "ZeitDatum" column into separate "Zeit" and "Datum" columns
clean_rows = []
for row in rows:
    if len(row) == 9:  # Ensure the row has the correct number of columns
        zeit_datum = row[2]
        zeit, datum = zeit_datum[:5], zeit_datum[5:]
        clean_row = row[:2] + [zeit, datum] + row[3:]
        clean_rows.append(clean_row)

# Define the final columns
columns = ['Gewaesser', 'Einheit', 'Zeit', 'Datum', 'Wert_Aktuell', '24h_vorher', 'Differenz', 'Mittel_24h', 'Maximum_24h', 'Minimum_24h']

# Create DataFrame
df = pd.DataFrame(clean_rows, columns=columns)

# Optionally, save the DataFrame to a CSV file
df.to_csv('extracted_table.csv', index=False)

# Print the DataFrame
df

Unnamed: 0,Gewaesser,Einheit,Zeit,Datum,Wert_Aktuell,24h_vorher,Differenz,Mittel_24h,Maximum_24h,Minimum_24h
0,Aa-Stegen-Wetzikon,l/s,14:35,07.09.2024,52,1'846,-1'794.4,241,1'862,47
1,Aabach-Käpfnach,l/s,14:30,07.09.2024,48,499,-451.0,162,499,48
2,Aabach-Mönchaltorf,m3/s,14:30,07.09.2024,0.34,0.37,-0.036,0.33,0.37,0.28
3,Aabach-Niederuster,m3/s,14:30,07.09.2024,0.37,2.19,-1.826,0.71,2.26,0.29
4,Abistbach-Marthalen HW-RB,müM,14:30,07.09.2024,398.86,398.86,-0.003,398.86,398.86,398.86
...,...,...,...,...,...,...,...,...,...,...
61,Türlersee,müM,13:30,07.09.2024,644.24,644.25,-0.004,644.25,644.25,644.24
62,Türlersee Abfluss,l/s,13:00,07.09.2024,48,55,-7.1,52,56,48
63,"Wildbach-Grosswies, Wetzikon",müM,14:30,07.09.2024,532.98,532.99,-0.006,532.99,532.99,532.98
64,Wildbach-Wetzikon,m3/s,14:30,07.09.2024,0.13,0.15,-0.020,0.13,0.17,0.08


### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [3]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
NT
Windows | 10
Datetime: 2024-09-07 14:45:32
Python Version: 3.11.9
-----------------------------------
