# Geocoding apartment addresses using the geoadmin API

## Libraries and settings

In [1]:
# Libraries
import os
import requests
import json
import urllib
import fnmatch
import folium
import pandas as pd
from IPython.display import clear_output

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

## Geocoding a single address

### Define base url for address search

In [2]:
# Define base url for address search
base_url= "https://api3.geo.admin.ch/rest/services/api/SearchServer?"

# Set up search parameters: address, origins and type
parameters = {"searchText": "Theaterstrasse 17, 8400 Winterthur",
              "origins": "address",
              "type": "locations",
             }

# Urllib.parse.urlencode turns parameters into url
print(f"{base_url}{urllib.parse.urlencode(parameters)}")

https://api3.geo.admin.ch/rest/services/api/SearchServer?searchText=Theaterstrasse+17%2C+8400+Winterthur&origins=address&type=locations


### Server request & response

In [3]:
# Server request
r = requests.get(f"{base_url}{urllib.parse.urlencode(parameters)}")

# Get data in json-format
data = json.loads(r.content)
data

# Take only the first server response, convert to data frame with relevant infos
df = pd.DataFrame.from_dict(list(data.values())[0][0], orient='columns')
df.iloc[[1,4,5,6,11,12],:1]

Unnamed: 0,attrs
featureId,2323240_0
label,Theaterstrasse 17 <b>8400 Winterthur</b>
lat,47.503517
lon,8.727852
x,262216.125
y,697128.5625


## Geocoding multiple addresses

### Importing apartment data

In [4]:
# Get current working directory
print(os.getcwd())

# Show all files in the directory
flist = fnmatch.filter(os.listdir('.'), '*.csv')
for i in flist:
    print(i)

# Read the data to a pandas data frame
df = pd.read_csv('apartments_data_prepared.csv', 
                 sep=',', 
                 encoding='utf-8')[['web-scraper-order', 
                                    'address_raw', 
                                    'datetime',
                                    'rooms', 
                                    'area', 
                                    'luxurious', 
                                    'price_per_m2']][:100] # first 100 apartment adresses

# Get number of rows and columns
print(df.shape)

# Show first records
df.head(10)

/workspaces/data_analytics/Week_05
apartments_data_geocoded.csv
apartments_data_geocoded_all.csv
supermarkets_data_prepared.csv
apartments_data_prepared.csv
(100, 7)


Unnamed: 0,web-scraper-order,address_raw,datetime,rooms,area,luxurious,price_per_m2
0,1662023695-433,"Sunnenbergstrasse 15, 8633 Wolfhausen, ZH",2022-09-07 09:00:00,3.5,122,1,26.07
1,1662023745-820,"Lavaterstr. 63, 8002 Zürich, ZH",2022-09-07 09:00:00,2.5,78,0,48.21
2,1662023742-807,"Langfurrenstrasse 5c, 8623 Wetzikon ZH, ZH",2022-09-07 09:00:00,5.5,115,0,24.87
3,1662023804-1290,"Sandbuckweg 5A, 8157 Dielsdorf, ZH",2022-09-07 09:00:00,3.5,74,0,29.26
4,1662023739-771,"Parkring 59, 8002 Zürich, ZH",2022-09-07 09:00:00,5.5,195,1,35.38
5,1662023707-544,"Ifangweg 1, 8610 Uster, ZH",2022-09-07 09:00:00,2.0,47,0,29.79
6,1662023720-640,"Flobotstrasse 2, 8044 Zürich, ZH",2022-09-07 09:00:00,2.5,59,0,49.49
7,1662023686-384,"Hinterbergstrasse 57, 8044 Zürich, ZH",2022-09-07 09:00:00,3.0,75,0,40.04
8,1662023758-929,"Frohbergstr. 7, 8162 Steinmaur, ZH",2022-09-07 09:00:00,4.5,97,0,18.25
9,1662023727-685,"8306 Brüttisellen, ZH",2022-09-07 09:00:00,4.5,124,0,35.97


### Geocoding multiple apartment addresses using the geoadmin API

In [5]:
# Define base url
base_url= "https://api3.geo.admin.ch/rest/services/api/SearchServer?"

# Geocode list of adresses
geolocation = []
n = 1
for i in df['address_raw'].astype(str):
    
    print('Geocoding address', 
          n, 
          'out of', 
          len(df['address_raw']), 
          ':', 
          i)
    n=n+1
    clear_output(wait=True)
    
    try:
        # Set up search parameters - address, origins and type
        parameters = {"searchText": i,
                      "origins": "address",
                      "type": "locations",
                     }

        # Server request
        r = requests.get(f"{base_url}{urllib.parse.urlencode(parameters)}")

        # Get data
        data = json.loads(r.content)

        # Take first server response, convert to df with relevant infos
        df_loc = pd.DataFrame.from_dict(list(data.values())[0][0], 
                                        orient='columns')
        geolocation.append(df_loc.iloc[[5,6],0].astype(float))
    
    except:
        geolocation.append(pd.Series(data={'lat': None, 'lon': None}))
        

# Write lat and lon to df
df_loc = pd.DataFrame(geolocation, 
                      columns=("lat", "lon"), 
                      index=range(len(df['address_raw'])))
df['lat'] = df_loc['lat']
df['lon'] = df_loc['lon']
df.head(5)

Unnamed: 0,web-scraper-order,address_raw,datetime,rooms,area,luxurious,price_per_m2,lat,lon
0,1662023695-433,"Sunnenbergstrasse 15, 8633 Wolfhausen, ZH",2022-09-07 09:00:00,3.5,122,1,26.07,47.255714,8.804976
1,1662023745-820,"Lavaterstr. 63, 8002 Zürich, ZH",2022-09-07 09:00:00,2.5,78,0,48.21,47.361378,8.533339
2,1662023742-807,"Langfurrenstrasse 5c, 8623 Wetzikon ZH, ZH",2022-09-07 09:00:00,5.5,115,0,24.87,47.328693,8.810411
3,1662023804-1290,"Sandbuckweg 5A, 8157 Dielsdorf, ZH",2022-09-07 09:00:00,3.5,74,0,29.26,47.477493,8.456285
4,1662023739-771,"Parkring 59, 8002 Zürich, ZH",2022-09-07 09:00:00,5.5,195,1,35.38,47.366898,8.528817


### Plot addresses on map

In [6]:
# Initialisierung der Map
m = folium.Map(location=[47.44, 8.65], zoom_start=10)

# Add lat/lon of addresses
df_sub = df.dropna()
for i in range(0, len(df_sub)):
    folium.Marker(location=(df_sub.iloc[i]['lat'], 
                            df_sub.iloc[i]['lon']), 
                  popup=df_sub.iloc[i]['address_raw']).add_to(m)

# Layer control
folium.LayerControl().add_to(m)

# Plot map
m

### Save data to file

In [7]:
df.to_csv('Geodata/apartments_data_geocoded.csv', 
           sep=",", 
           encoding='utf-8',
           index=False)

### Jupyter notebook --footer info-- (please always provide this at the end of each submitted notebook)

In [8]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Linux | 5.15.0-1041-azure
Datetime: 2023-08-21 09:41:18
Python Version: 3.10.8
-----------------------------------
