In [1]:
import pandas as pd
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import time


input_file_path = 'data_immo_202405151452.csv'
df = pd.read_csv(input_file_path)


API_URL = "https://api3.geo.admin.ch/rest/services/api/SearchServer"


def geocode_address(row):
    try:
        address = f"{row['Street']}, {row['Zip']}, {row['City']}"
        params = {
            'searchText': address,
            'type': 'locations',
            'sr': '2056',  
            'limit': 1
        }
        response = requests.get(API_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            if data['results']:
                location = data['results'][0]['attrs']
                return (row.name, location['lat'], location['lon']) 
        return (row.name, None, None)
    except Exception as e:
        return (row.name, None, None)


def parallel_geocode(df, max_workers=10):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(geocode_address, row): row for _, row in df.iterrows()}
        for i, future in enumerate(tqdm(as_completed(futures), total=len(futures))):
            result = future.result()
            results.append(result)
            if (i + 1) % 10 == 0:  # Update CSV every 10 results
                for idx, lat, lon in results[-10:]:
                    df.at[idx, 'Latitude'] = lat
                    df.at[idx, 'Longitude'] = lon
                df.to_csv(output_file_path, index=False)
                time.sleep(1)  # Wartezeit nach jedem Schreiben in die Datei
    return results


output_file_path = 'data_immo_geocoded.csv'
results = parallel_geocode(df)


for idx, lat, lon in results:
    df.at[idx, 'Latitude'] = lat
    df.at[idx, 'Longitude'] = lon

df.to_csv(output_file_path, index=False)


100%|██████████| 12247/12247 [21:32<00:00,  9.47it/s]
