In [1]:
# 1. import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [5]:
def wiki_data(cities):
    list_df = []
    for city in cities:
        # find url and store it in a variable
        url = f"https://en.wikipedia.org/wiki/{city}"

        # download html with a get request
        headers = {"Accept-Language": "en-US,en;q=0.8"}
        response = requests.get(url, headers=headers)
        response.status_code  # 200 status code means OK!

        # parse html (create the 'soup')
        soup = BeautifulSoup(response.content, "html.parser")

        # find the table with the class 'infobox ib-settlement vcard'
        table = soup.find("table", {"class": "infobox ib-settlement vcard"})

        # create a dictionary to store the data
        res_dic = {}

        # locate the div tag that contains the name of the city and extract the text
        res_dic["city"] = table.select("div.fn.org")[0].text

        # locate the div tag that contains the country and extract the text
        res_dic["country"] = table.select("td.infobox-data")[0].text

        if soup.select_one('th.infobox-header:-soup-contains("Population")'):
            res_dic["population"] = (
                soup.select_one('th.infobox-header:-soup-contains("Population")')
                .parent.find_next_sibling()
                .find(text=re.compile(r"\d+"))
            )

        # locate the span element that contains the latitude and extract the text
        latitude = (
            table.select("span.latitude")[0]
            .text.replace("°", ".")
            .replace("″", "")
            .replace("′", "")
        )
        res_dic["latitude"] = re.split("[a-zA-Z]", latitude)[0]

        # locate the span element that contains the latitude and extract the text
        longitude = (
            table.select("span.longitude")[0]
            .text.replace("°", ".")
            .replace("″", "")
            .replace("′", "")
        )
        res_dic["longitude"] = re.split("[a-zA-Z]", longitude)[0]
        list_df.append(res_dic)
    df = pd.DataFrame(list_df)
    return df


In [6]:
list_of_cities=['Berlin','London','Paris','Madrid','Milan','Munich']
df=wiki_data(list_of_cities)

In [None]:
schema="db_weather_flights"   # name of the database you want to use here
host='Your Host ID'           # to connect to your local server
user="root"
password='PASSWORD'           # your password!!!!
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [None]:
df.to_sql('cities',                # 'cities'-> table name;
              if_exists='append',  # if_exists -> will create new table if doesn't exist, 
                                   #otherwise, 'append' - will append data to existing table;
              con=con,             # con-> connection string;
              index=False)  