In [8]:
pip install exifread pandas numpy scikit-learn folium

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [9]:
import os
import exifread
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import folium

In [10]:
# Helper: Convert GPS to decimal
def _convert_to_degrees(value):
    d = float(value[0].num) / float(value[0].den)
    m = float(value[1].num) / float(value[1].den)
    s = float(value[2].num) / float(value[2].den)
    return d + (m / 60.0) + (s / 3600.0)


In [11]:
# Extract GPS from image
def get_lat_lon(img_path):
    with open(img_path, 'rb') as f:
        tags = exifread.process_file(f, stop_tag='GPS GPSLongitude')
        
        if 'GPS GPSLatitude' in tags and 'GPS GPSLongitude' in tags:
            lat = _convert_to_degrees(tags['GPS GPSLatitude'].values)
            lon = _convert_to_degrees(tags['GPS GPSLongitude'].values)

            # Check direction (N/S, E/W)
            lat_ref = tags['GPS GPSLatitudeRef'].printable
            lon_ref = tags['GPS GPSLongitudeRef'].printable

            if lat_ref != 'N':
                lat = -lat
            if lon_ref != 'E':
                lon = -lon
            return lat, lon
    return None, None

In [12]:
# Process all images
image_folder = 'archive'
data = []
for filename in os.listdir(image_folder):
    if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'):
        path = os.path.join(image_folder, filename)
        lat, lon = get_lat_lon(path)
        if lat and lon:
            data.append({'filename': filename, 'Latitude': lat, 'Longitude': lon})

In [13]:
# Convert to DataFrame
df = pd.DataFrame(data)

if df.empty:
    print("No valid geotagged images found.")
    exit()

In [14]:
# DBSCAN Clustering
coords = df[['Latitude', 'Longitude']].values
scaler = StandardScaler()
coords_scaled = scaler.fit_transform(coords)
db = DBSCAN(eps=0.3, min_samples=3).fit(coords_scaled)
df['Cluster'] = db.labels_

In [15]:
# Save and Show Results
# ---------------------
print(df.head())
df.to_csv('clustered_locations.csv', index=False)

       filename   Latitude  Longitude  Cluster
0  IMG_7747.JPG  46.594915   6.566144        0
1  IMG_7753.JPG  46.593656   6.567699       -1
2  IMG_7752.JPG  46.593672   6.565396       -1
3  IMG_7746.JPG  46.595520   6.567692       -1
4  IMG_7750.JPG  46.594612   6.565440       -1


In [17]:
# Print all clustered results
print("\n📌 Clustering Results:")
for idx, row in df.iterrows():
    print(f"Image: {row['filename']}, Latitude: {row['Latitude']:.6f}, Longitude: {row['Longitude']:.6f}, Cluster: {row['Cluster']}")
    


📌 Clustering Results:
Image: IMG_7747.JPG, Latitude: 46.594915, Longitude: 6.566144, Cluster: 0
Image: IMG_7753.JPG, Latitude: 46.593656, Longitude: 6.567699, Cluster: -1
Image: IMG_7752.JPG, Latitude: 46.593672, Longitude: 6.565396, Cluster: -1
Image: IMG_7746.JPG, Latitude: 46.595520, Longitude: 6.567692, Cluster: -1
Image: IMG_7750.JPG, Latitude: 46.594612, Longitude: 6.565440, Cluster: -1
Image: IMG_7744.JPG, Latitude: 46.594793, Longitude: 6.568963, Cluster: -1
Image: IMG_7745.JPG, Latitude: 46.595473, Longitude: 6.568057, Cluster: -1
Image: IMG_7751.JPG, Latitude: 46.593992, Longitude: 6.564960, Cluster: -1
Image: IMG_7755.JPG, Latitude: 46.593541, Longitude: 6.568142, Cluster: 1
Image: IMG_7741.JPG, Latitude: 46.593817, Longitude: 6.568697, Cluster: -1
Image: IMG_7740.JPG, Latitude: 46.593201, Longitude: 6.567214, Cluster: -1
Image: IMG_7754.JPG, Latitude: 46.593567, Longitude: 6.568101, Cluster: 1
Image: IMG_7742.JPG, Latitude: 46.594180, Longitude: 6.568982, Cluster: -1
Image

In [18]:
# Visualize with Folium
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=13)

colors = ['blue', 'green', 'purple', 'orange', 'darkred', 'cadetblue']
for _, row in df.iterrows():
    cluster_color = 'red' if row['Cluster'] == -1 else colors[row['Cluster'] % len(colors)]
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=5,
        color=cluster_color,
        fill=True,
        fill_opacity=0.6,
        tooltip=f"Image: {row['filename']} | Cluster: {row['Cluster']}"
    ).add_to(m)

m.save('geotagged_clusters_map.html')
print("Map saved as 'geotagged_clusters_map.html'")

Map saved as 'geotagged_clusters_map.html'
