# Web Scraping for German cities and respective population

In [57]:
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
from bs4 import BeautifulSoup

In [58]:
def extract_info(table):
    census_data = {'city': [], 'status': [], 'adm': [], 'population': []}
    for row in tqdm(table.find_all('tr', attrs={'itemscope':''})):
        try:
            census_data['city'].append(row.find('td', attrs={'class':'rname'}).text)
            census_data['status'].append(row.find('td', attrs={'class':'rstatus'}).text)
            census_data['adm'].append(row.find('td', attrs={'class':'radm'}).text)
            census_data['population'].append(row.find('td', attrs={'class':'rpop prio1'}).text)

        except:
            census_data['city'].append(np.nan)
            census_data['status'].append(np.nan)
            census_data['adm'].append(np.nan)
            census_data['population'].append(np.nan)

    census_data_df = pd.DataFrame(census_data, columns=['city', 'status', 'adm', 'population'])

    return census_data_df

In [59]:
# results = {'city': [], 'Adm': [], 'population': []}
url = 'https://www.citypopulation.de/en/germany/cities/'
content = requests.get(url)
soup = BeautifulSoup(content.text, 'html.parser')
results = extract_info(soup.find(id='ts'))

In [60]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   city        191 non-null    object
 1   status      191 non-null    object
 2   adm         191 non-null    object
 3   population  191 non-null    object
dtypes: object(4)
memory usage: 6.1+ KB


In [61]:
results.head()

Unnamed: 0,city,status,adm,population
0,,,,
1,Aachen [Aix-la-Chapelle],City,NW,248960.0
2,Aalen,City,BW,68393.0
3,Ahlen,City,NW,52503.0
4,Arnsberg,City,NW,73456.0


In [62]:
results.dropna(inplace=True)
results.reset_index(drop=True, inplace=True)
results

Unnamed: 0,city,status,adm,population
0,Aachen [Aix-la-Chapelle],City,NW,248960
1,Aalen,City,BW,68393
2,Ahlen,City,NW,52503
3,Arnsberg,City,NW,73456
4,Aschaffenburg,CityCR,BY,71002
...,...,...,...,...
186,Wolfsburg,CityCR,NI,124371
187,Worms,CityCR,RP,83542
188,Wuppertal,CityCR,NW,355100
189,Würzburg,CityCR,BY,127934


In [64]:
results.to_csv('cities_and_population.csv', index=False)