Imports & Constants

In [2]:
import requests
import csv
import json
from typing import Any, Dict, List, Tuple

# Constants
DEFAULT_ENDPOINT = "https://overpass-api.de/api/interpreter"
USER_AGENT = "BerlinDentistsFetcher/1.0 (jaywindie1@gmail.com)" 
OVERPASS_QUERY = """
[out:json][timeout:60];
area["name"="Berlin"]["admin_level"="4"]->.berlin;
(
  node["healthcare"="dentist"](area.berlin);
  way["healthcare"="dentist"](area.berlin);
  relation["healthcare"="dentist"](area.berlin);
);
out center;
"""


Fetch Data from Overpass API

In [3]:
def fetch_overpass(query: str, endpoint: str = DEFAULT_ENDPOINT) -> Dict[str, Any]:
    headers = {"User-Agent": USER_AGENT}
    response = requests.post(endpoint, data={"data": query}, headers=headers)
    response.raise_for_status()
    return response.json()

# Test fetch (small run)
data = fetch_overpass(OVERPASS_QUERY)
print(f"Retrieved {len(data.get('elements', []))} elements.")


Retrieved 780 elements.


Normalize Elements (Flatten Into Rows)

In [9]:
def normalize_element(e: Dict[str, Any]) -> Dict[str, Any]:
    tags = e.get("tags", {})
    # Some elements are 'node', some are 'way' or 'relation' with a 'center'
    lat = e.get("lat") or (e.get("center", {}).get("lat") if "center" in e else None)
    lon = e.get("lon") or (e.get("center", {}).get("lon") if "center" in e else None)
    
    return {
        "osm_type": e.get("type"),
        "osm_id": e.get("id"),
        "name": tags.get("name"),
        "addr_street": tags.get("addr:street"),
        "addr_housenumber": tags.get("addr:housenumber"),
        "addr_postcode": tags.get("addr:postcode"),
        "addr_city": tags.get("addr:city"),
        "opening_hours": tags.get("opening_hours"),
        "wheelchair": tags.get("wheelchair"),
        "phone": tags.get("phone"),
        "email": tags.get("email"),
        "website": tags.get("website"),
        "lat": lat,
        "lon": lon,
    }

# Normalize some elements
elements = data.get("elements", [])
rows = [normalize_element(e) for e in elements]
print(f"Normalized {len(rows)} elements. Example:\n", rows[0] if rows else "No data")


Normalized 780 elements. Example:
 {'osm_type': 'node', 'osm_id': 304183504, 'name': None, 'addr_street': 'Hönower Straße', 'addr_housenumber': '75', 'addr_postcode': '12623', 'addr_city': 'Berlin', 'opening_hours': None, 'wheelchair': None, 'phone': None, 'email': None, 'website': None, 'lat': 52.5114112, 'lon': 13.612096}


Convert to GeoJSON

In [10]:
def elements_to_geojson(elements: List[Dict[str, Any]]) -> Dict[str, Any]:
    features = []
    for e in elements:
        tags = e.get("tags", {})
        lat = e.get("lat") or (e.get("center", {}).get("lat") if "center" in e else None)
        lon = e.get("lon") or (e.get("center", {}).get("lon") if "center" in e else None)
        if lat is None or lon is None:
            continue
        features.append({
            "type": "Feature",
            "geometry": {"type": "Point", "coordinates": [lon, lat]},
            "properties": {
                "osm_type": e.get("type"),
                "osm_id": e.get("id"),
                **tags
            }
        })
    return {"type": "FeatureCollection", "features": features}

geojson = elements_to_geojson(elements)
print(f"GeoJSON with {len(geojson['features'])} features created.")


GeoJSON with 780 features created.


Save to CSV & GeoJSON

In [11]:
def write_csv(rows: List[Dict[str, Any]], path: str) -> None:
    fieldnames = [
        "osm_type", "osm_id", "name", "addr_street", "addr_housenumber",
        "addr_postcode", "addr_city", "opening_hours", "wheelchair",
        "phone", "email", "website", "lat", "lon"
    ]
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for el in rows:
            writer.writerow({k: el.get(k) for k in fieldnames})

def write_geojson(geo: Dict[str, Any], path: str) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump(geo, f, ensure_ascii=False, indent=2)

# Save both files
csv_path = "berlin_dentists.csv"
geojson_path = "berlin_dentists.geojson"

write_csv(rows, csv_path)
write_geojson(geojson, geojson_path)

print(f"Saved CSV to {csv_path}")
print(f"Saved GeoJSON to {geojson_path}")


Saved CSV to berlin_dentists.csv
Saved GeoJSON to berlin_dentists.geojson


Quick Data Preview

In [12]:
import pandas as pd

df = pd.DataFrame(rows)
df.head(10)  # show first 10 dentists


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,email,website,lat,lon
0,node,304183504,,Hönower Straße,75,12623.0,Berlin,,,,,,52.511411,13.612096
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,,,52.548838,13.355305
2,node,325161442,A. Nejad,,,,,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,+49 30 361 91 06,,,52.508843,13.180477
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,,,,http://www.dr-beate-lengert.de/,52.502722,13.328137
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,,,52.537547,13.418994
5,node,420517053,"Zahnärzte Nicolas Weiss, Volker Landmann",,,,,"Mo-Fr ""nach Vereinbarung""",,,,,52.384968,13.40487
6,node,430545835,DentZ,Tempelhofer Damm,143,12099.0,Berlin,"Mo,Tu,Fr 09:00-16:00; We,Th 11:00-19:00",yes,+49 30 2647972600,,,52.466306,13.385948
7,node,442391661,Zahnklinik Medeco,,,,,"Mo-Fr 07:00-21:00; Sa,Su,PH 09:00-18:00",limited,,,,52.451063,13.385178
8,node,484267657,Mund-Kiefer-Gesichtschirugie,,,,Berlin,,,,,,52.525158,13.310129
9,node,552149348,Zahnärztliche Gemeinschaftspraxis,,,,,Mo-Th 08:00-19:00; Fr 08:00-14:00,,+4930 4542013,Kontakt@Zahnarztpraxis-Speda.de,https://www.zahnarztpraxis-speda.de/,52.541379,13.35379


Imports & File Setup

In [13]:
import pandas as pd
import os

# Path to the raw CSV produced from Overpass
input_file = "/Users/jamie/berlin_dentists.csv"

# Load the CSV
df = pd.read_csv(input_file)
df.head()


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,email,website,lat,lon
0,node,304183504,,Hönower Straße,75,12623.0,Berlin,,,,,,52.511411,13.612096
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,,,52.548838,13.355305
2,node,325161442,A. Nejad,,,,,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,+49 30 361 91 06,,,52.508843,13.180477
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,,,,http://www.dr-beate-lengert.de/,52.502722,13.328137
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,,,52.537547,13.418994


Inspect Raw Data

In [14]:
print("Initial Data Info:")
print(df.info())
df.head(10)  # preview first 10 rows


Initial Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   osm_type          780 non-null    object 
 1   osm_id            780 non-null    int64  
 2   name              754 non-null    object 
 3   addr_street       571 non-null    object 
 4   addr_housenumber  571 non-null    object 
 5   addr_postcode     531 non-null    float64
 6   addr_city         523 non-null    object 
 7   opening_hours     588 non-null    object 
 8   wheelchair        288 non-null    object 
 9   phone             287 non-null    object 
 10  email             82 non-null     object 
 11  website           299 non-null    object 
 12  lat               780 non-null    float64
 13  lon               780 non-null    float64
dtypes: float64(3), int64(1), object(10)
memory usage: 85.4+ KB
None


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,email,website,lat,lon
0,node,304183504,,Hönower Straße,75,12623.0,Berlin,,,,,,52.511411,13.612096
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,,,52.548838,13.355305
2,node,325161442,A. Nejad,,,,,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,+49 30 361 91 06,,,52.508843,13.180477
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,,,,http://www.dr-beate-lengert.de/,52.502722,13.328137
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,,,52.537547,13.418994
5,node,420517053,"Zahnärzte Nicolas Weiss, Volker Landmann",,,,,"Mo-Fr ""nach Vereinbarung""",,,,,52.384968,13.40487
6,node,430545835,DentZ,Tempelhofer Damm,143,12099.0,Berlin,"Mo,Tu,Fr 09:00-16:00; We,Th 11:00-19:00",yes,+49 30 2647972600,,,52.466306,13.385948
7,node,442391661,Zahnklinik Medeco,,,,,"Mo-Fr 07:00-21:00; Sa,Su,PH 09:00-18:00",limited,,,,52.451063,13.385178
8,node,484267657,Mund-Kiefer-Gesichtschirugie,,,,Berlin,,,,,,52.525158,13.310129
9,node,552149348,Zahnärztliche Gemeinschaftspraxis,,,,,Mo-Th 08:00-19:00; Fr 08:00-14:00,,+4930 4542013,Kontakt@Zahnarztpraxis-Speda.de,https://www.zahnarztpraxis-speda.de/,52.541379,13.35379


Normalize Text Fields

In [15]:
df['name'] = df['name'].fillna('Unknown').str.strip().str.title()
df['addr_street'] = df['addr_street'].fillna('').str.strip().str.title()
df['addr_housenumber'] = df['addr_housenumber'].fillna('').astype(str)
df['addr_city'] = df['addr_city'].fillna('Berlin').str.strip().str.title()
df['addr_postcode'] = df['addr_postcode'].fillna('').astype(str)

# Combine into a single full address string
df['address_full'] = df['addr_street'].str.strip() + " " + df['addr_housenumber'].str.strip()

df.head(10)


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,email,website,lat,lon,address_full
0,node,304183504,Unknown,Hönower Straße,75,12623.0,Berlin,,,,,,52.511411,13.612096,Hönower Straße 75
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,,,52.548838,13.355305,Müllerstraße 34a
2,node,325161442,A. Nejad,,,,Berlin,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,+49 30 361 91 06,,,52.508843,13.180477,
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,,,,http://www.dr-beate-lengert.de/,52.502722,13.328137,Kurfürstendamm 218
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,,,52.537547,13.418994,Kollwitzstraße 77
5,node,420517053,"Zahnärzte Nicolas Weiss, Volker Landmann",,,,Berlin,"Mo-Fr ""nach Vereinbarung""",,,,,52.384968,13.40487,
6,node,430545835,Dentz,Tempelhofer Damm,143,12099.0,Berlin,"Mo,Tu,Fr 09:00-16:00; We,Th 11:00-19:00",yes,+49 30 2647972600,,,52.466306,13.385948,Tempelhofer Damm 143
7,node,442391661,Zahnklinik Medeco,,,,Berlin,"Mo-Fr 07:00-21:00; Sa,Su,PH 09:00-18:00",limited,,,,52.451063,13.385178,
8,node,484267657,Mund-Kiefer-Gesichtschirugie,,,,Berlin,,,,,,52.525158,13.310129,
9,node,552149348,Zahnärztliche Gemeinschaftspraxis,,,,Berlin,Mo-Th 08:00-19:00; Fr 08:00-14:00,,+4930 4542013,Kontakt@Zahnarztpraxis-Speda.de,https://www.zahnarztpraxis-speda.de/,52.541379,13.35379,


Remove Duplicates

In [16]:
before = len(df)
df = df.drop_duplicates(subset=['name', 'address_full'])
after = len(df)

print(f"Removed {before - after} duplicates, {after} rows remain.")


Removed 17 duplicates, 763 rows remain.


Normalize Phone Numbers

In [17]:
df['phone'] = (
    df['phone']
    .fillna('')
    .astype(str)
    .str.replace(r"\s+", "", regex=True)   # remove spaces
    .str.replace("-", "")                  # remove hyphens
)

df[['name', 'phone']].head(10)


Unnamed: 0,name,phone
0,Unknown,
1,Zahnzentrum Wedding,
2,A. Nejad,49303619106.0
3,Dr. Beate Lengert,
4,Serpil Hartfiel,
5,"Zahnärzte Nicolas Weiss, Volker Landmann",
6,Dentz,49302647972600.0
7,Zahnklinik Medeco,
8,Mund-Kiefer-Gesichtschirugie,
9,Zahnärztliche Gemeinschaftspraxis,49304542013.0


Standardize Wheelchair Field

In [18]:
df['wheelchair'] = df['wheelchair'].fillna('unknown').str.lower()
df['wheelchair'].value_counts()


wheelchair
unknown    479
no         152
yes         91
limited     41
Name: count, dtype: int64

Ensure Coordinates Are Valid

In [19]:
for col in ['lat', 'lon']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop rows without valid coordinates
before = len(df)
df = df.dropna(subset=['lat', 'lon'])
after = len(df)

print(f"Removed {before - after} rows due to missing/invalid coordinates.")


Removed 0 rows due to missing/invalid coordinates.


Save Clean Data

In [20]:
output_file = os.path.join(os.path.dirname(input_file), "berlin_dentists_clean.csv")
df.to_csv(output_file, index=False)
print(f"✅ Transformed data saved to {output_file}")


✅ Transformed data saved to /Users/jamie/berlin_dentists_clean.csv


Preview Clean Dataset

In [21]:
df.sample(10, random_state=42)  # preview 10 random dentists


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,email,website,lat,lon,address_full
362,node,7280430677,Dr. Ingrid Ispas,,,,Berlin,"Mo-We,Fr 09:00-12:00; Mo,Th 14:00-18:00",no,49307858022.0,,,52.492766,13.381717,
263,node,5479064923,Soraya Jalali,Wilmersdorfer Straße,55,10627.0,Berlin,"Mo-Tu 09:00-14:00,15:00-18:00; Th 09:00-14:00,...",unknown,,,,52.508034,13.305837,Wilmersdorfer Straße 55
766,node,12926260443,Dr. Leuenberg Zahnarztpraxis,Charlottenburger Straße,1,13086.0,Berlin,Mo 08:00-14:30; Tu 13:00-19:30; We 13:00-19:30...,unknown,49309253416.0,mail@leuenberg-zahnarztpraxis.de,https://www.leuenberg-zahnarztpraxis.de,52.550052,13.453648,Charlottenburger Straße 1
197,node,4665154493,Adentics Kieferorthopäden,,,,Berlin,,unknown,,,,52.387085,13.409332,
338,node,6915035485,Claudia Ruppert-Münnich,,,,Berlin,"Mo 08:00-18:00; Tu 08:00-14:00; We,Fr 08:00-12...",unknown,,,,52.386868,13.401874,
593,node,9756756767,Zahnarztpraxis Dr. Kristina Latt,Sabinensteig,14,13053.0,Berlin,"Mo 07:30-10:30,11:30-14:30; Tu 09:30-12:30,13:...",unknown,,,,52.549031,13.493374,Sabinensteig 14
772,way,68730451,Zahnärztin Dr. Med. Michaela Kirchner,Brunsbütteler Damm,274,13591.0,Berlin,"Mo,We 12:00-19:00; Tu,Th 08:00-14:00; Fr 08:00...",unknown,,,,52.533245,13.15568,Brunsbütteler Damm 274
139,node,3712880326,Frank Mertens,Goebenstraße,25,10783.0,Berlin,"Mo,Tu,Th 09:00-12:00,15:00-18:00; Fr 08:00-14:00",unknown,,,,52.494302,13.363198,Goebenstraße 25
68,node,2410196747,Dr. Roman Kogan,Schönhauser Allee,48,10437.0,Berlin,Mo-Th 08:00-20:00; Fr 08:00-19:00; Sa 09:00-14...,limited,,,,52.542128,13.412711,Schönhauser Allee 48
224,node,4989289053,Zahnarztpraxis Hawar Kadro,Alt-Moabit,84a,10555.0,Berlin,"Mo,Tu 09:00-18:00; We 09:00-16:00; Th 11:00-20...",no,49303917424.0,,http://zahnarzt-kadro.de/,52.525608,13.339835,Alt-Moabit 84a


download and install geopy so it can be used for geocoding etc

In [22]:
!pip install geopy




In [23]:
!pip install geopandas shapely pyproj fiona rtree



downloading the GeoJSON file with Berlin district boundaries

In [24]:
import requests

url = "https://tsb-opendata.s3.eu-central-1.amazonaws.com/bezirksgrenzen/bezirksgrenzen.geojson"
resp = requests.get(url)
with open("berlin_bezirksgeo.json", "wb") as f:
    f.write(resp.content)
print("GeoJSON downloaded as berlin_bezirksgeo.json")


GeoJSON downloaded as berlin_bezirksgeo.json


Imports and Read Files

In [25]:
import geopandas as gpd
from shapely.geometry import Point

# Load the Berlin districts GeoJSON
districts = gpd.read_file("berlin_bezirksgeo.json")

# Load your dentists cleaned CSV with lat/lon
dentists_df = gpd.read_file("berlin_dentists_clean.csv")  # We'll convert to GeoDataFrame later

print(f"Districts count: {len(districts)}")
print(f"Dentists count: {len(dentists_df)}")


Districts count: 12
Dentists count: 763


Prepare Dentists GeoDataFrame

In [26]:
# Create Point geometry from lat/lon
geometry = [Point(xy) for xy in zip(dentists_df['lon'], dentists_df['lat'])]

# Create GeoDataFrame of dentists with proper CRS (Coordinate Reference System)
dentists = gpd.GeoDataFrame(dentists_df, geometry=geometry, crs="EPSG:4326")

print(dentists.head())


  osm_type     osm_id                 name     addr_street addr_housenumber  \
0     node  304183504              Unknown  Hönower Straße               75   
1     node  313539258  Zahnzentrum Wedding    Müllerstraße              34a   
2     node  325161442             A. Nejad                                    
3     node  345236220    Dr. Beate Lengert  Kurfürstendamm              218   
4     node  391394177      Serpil Hartfiel  Kollwitzstraße               77   

  addr_postcode addr_city                                      opening_hours  \
0       12623.0    Berlin                                                      
1       13353.0    Berlin  Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...   
2                  Berlin  Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...   
3       10719.0    Berlin                                                      
4       10435.0    Berlin                  Mo-Th 08:00-19:00; Fr 08:00-12:00   

  wheelchair         phone email            

Ensuring Both GeoDataFrames Use Same CRS

In [27]:
# Check CRS of districts and dentists
print("Districts CRS:", districts.crs)
print("Dentists CRS:", dentists.crs)

# Reproject dentists to district CRS if needed
if dentists.crs != districts.crs:
    dentists = dentists.to_crs(districts.crs)
    print("Dentists reprojected to match districts CRS")

print("After reprojection, dentists CRS:", dentists.crs)


Districts CRS: EPSG:4326
Dentists CRS: EPSG:4326
After reprojection, dentists CRS: EPSG:4326


Performing Spatial Join to Add District Info to Dentists

In [28]:
# Spatial join: assign each dentist a district polygon it falls within
dentists_with_districts = gpd.sjoin(dentists, districts, how="left", predicate="within")

# Inspect new columns from districts GeoDataFrame, e.g., 'Gemeinde_name' for district name
print(dentists_with_districts.columns)
print(dentists_with_districts[['name', 'Gemeinde_name']].head())


Index(['osm_type', 'osm_id', 'name', 'addr_street', 'addr_housenumber',
       'addr_postcode', 'addr_city', 'opening_hours', 'wheelchair', 'phone',
       'email', 'website', 'lat', 'lon', 'address_full', 'geometry',
       'index_right', 'gml_id', 'Gemeinde_name', 'Gemeinde_schluessel',
       'Land_name', 'Land_schluessel', 'Schluessel_gesamt'],
      dtype='object')
                  name               Gemeinde_name
0              Unknown         Marzahn-Hellersdorf
1  Zahnzentrum Wedding                       Mitte
2             A. Nejad                     Spandau
3    Dr. Beate Lengert  Charlottenburg-Wilmersdorf
4      Serpil Hartfiel                      Pankow


Dropping columns

In [29]:
dentists_with_districts.drop(columns=['gml_id'], inplace=True)


Renaming columns with German Heads to English

In [30]:
dentists_with_districts.rename(columns={
    'Gemeinde_name': 'district_name',
    'Gemeinde_schluessel': 'district_id',
    'Land_name': 'state_name',
    'Land_schluessel': 'state_code',
    'Schluessel_gesamt': 'full_district_id'
}, inplace=True)


Save Joined Result to CSV

In [31]:
output_file = "berlin_dentists_with_districts.csv"
dentists_with_districts.to_csv(output_file, index=False)
print(f"Saved dentists with districts info to {output_file}")


Saved dentists with districts info to berlin_dentists_with_districts.csv


Importing libraries and reading CSV with pandas

In [32]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Read CSV with pandas
df = pd.read_csv("berlin_dentists_with_districts.csv")
df.head()


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,...,lat,lon,address_full,geometry,index_right,district_name,district_id,state_name,state_code,full_district_id
0,node,304183504,Unknown,Hönower Straße,75,12623.0,Berlin,,unknown,,...,52.511411,13.612096,Hönower Straße 75,POINT (13.612096 52.5114112),6,Marzahn-Hellersdorf,10,Berlin,11,11000010
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,...,52.548838,13.355305,Müllerstraße 34a,POINT (13.3553052 52.5488382),9,Mitte,1,Berlin,11,11000001
2,node,325161442,A. Nejad,,,,Berlin,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,49303619106.0,...,52.508843,13.180477,,POINT (13.1804772 52.5088434),7,Spandau,5,Berlin,11,11000005
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,unknown,,...,52.502722,13.328137,Kurfürstendamm 218,POINT (13.3281367 52.5027217),1,Charlottenburg-Wilmersdorf,4,Berlin,11,11000004
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,...,52.537547,13.418994,Kollwitzstraße 77,POINT (13.4189939 52.5375469),3,Pankow,3,Berlin,11,11000003


Converting to GeoDataFrame with Point geometry

In [33]:
# Create Point geometries from lon/lat columns
geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

# Convert pandas DataFrame to GeoDataFrame, set CRS to WGS84 (EPSG:4326)
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

gdf.head()


Unnamed: 0,osm_type,osm_id,name,addr_street,addr_housenumber,addr_postcode,addr_city,opening_hours,wheelchair,phone,...,lat,lon,address_full,geometry,index_right,district_name,district_id,state_name,state_code,full_district_id
0,node,304183504,Unknown,Hönower Straße,75,12623.0,Berlin,,unknown,,...,52.511411,13.612096,Hönower Straße 75,POINT (13.6121 52.51141),6,Marzahn-Hellersdorf,10,Berlin,11,11000010
1,node,313539258,Zahnzentrum Wedding,Müllerstraße,34a,13353.0,Berlin,Mo 09:00-19:00; Tu 09:00-18:00; We 09:00-17:00...,yes,,...,52.548838,13.355305,Müllerstraße 34a,POINT (13.35531 52.54884),9,Mitte,1,Berlin,11,11000001
2,node,325161442,A. Nejad,,,,Berlin,Mo-Tu 09:00-19:00; We 09:00-14:00; Th 09:00-19...,yes,49303619106.0,...,52.508843,13.180477,,POINT (13.18048 52.50884),7,Spandau,5,Berlin,11,11000005
3,node,345236220,Dr. Beate Lengert,Kurfürstendamm,218,10719.0,Berlin,,unknown,,...,52.502722,13.328137,Kurfürstendamm 218,POINT (13.32814 52.50272),1,Charlottenburg-Wilmersdorf,4,Berlin,11,11000004
4,node,391394177,Serpil Hartfiel,Kollwitzstraße,77,10435.0,Berlin,Mo-Th 08:00-19:00; Fr 08:00-12:00,no,,...,52.537547,13.418994,Kollwitzstraße 77,POINT (13.41899 52.53755),3,Pankow,3,Berlin,11,11000003
