# Web scraping location on U-Bahn Stations in Berlin

# 1. Setup

In [1]:
from bs4 import BeautifulSoup # parses the html
import requests # fetches the data from the website
import re
import pandas as pd
import matplotlib.pyplot as plt

# 2. Fetch content from URL

In [2]:
page_link = "https://de.wikipedia.org/wiki/Liste_der_Berliner_U-Bahnhöfe"
page_response = requests.get(page_link, timeout=5)

## 2.1 Parse html

The result should be the array containing this specific class for each station.

In [3]:
wiki_page = BeautifulSoup(page_response.content, "html.parser")
stations = wiki_page.find_all(class_= 'plainlinks-print')

## 2.2 Find and save coordinates of each station

In [4]:
lat = [] # latitude
long = [] # longtitude
station_names = []

In [5]:
for station in stations:
    station_name = station["id"]
    finds = station.find('a', href=True)    
    href = re.findall(r'(?<=params=).*?(?=_E_region)', finds['href'])[0].split("_N_")   
    
    lat.append(href[0])
    long.append(href[1])
    station_names.append(station_name)

In [6]:
stat_geo = pd.DataFrame({'station_name':station_names, 
                         'latitude': lat, 
                         'longtitude':long})

In [7]:
stat_geo.drop_duplicates(inplace=True)

In [8]:
stat_geo['latitude'] = pd.to_numeric(stat_geo['latitude'], errors='coerce')
stat_geo['longtitude'] = pd.to_numeric(stat_geo['longtitude'], errors='coerce')

In [9]:
stat_geo.tail()

Unnamed: 0,station_name,latitude,longtitude
194,Wutzkyallee,52.423333,13.474722
195,Yorckstraße,52.493056,13.370833
196,Zitadelle,52.537778,13.217778
197,Zoologischer_Garten,52.507222,13.3325
199,Zwickauer_Damm,52.423333,13.483889


In [10]:
locations  = stat_geo[['latitude', 'longtitude']]
locationlist = locations.values.tolist()

popup = stat_geo[['station_name']]

# 3. Visualize

In [11]:
import folium
map_osm = folium.Map(location=[52.521347, 13.413407], #Alexanderplatz
                    # tiles='Stamen Toner',
                     zoom_start=11) 

for point in range(0, len(locationlist)):
    folium.Marker(locationlist[point], popup=popup.iloc[point,0]).add_to(map_osm)
    
map_osm