# GeoAI-VLM Demo

This notebook demonstrates the core features of the **GeoAI-VLM** package:

1. **Image Download**: Download street-level imagery from Mapillary API
2. **VLM Analysis**: Analyze images with Vision-Language Models
3. **GeoParquet I/O**: Read and write results
4. **Query Types**: Point, Line, BBox, Place queries

In [None]:
import os
from pathlib import Path

# Set your Mapillary API key
MLY_API_KEY = os.environ.get("MLY_API_KEY", "YOUR_MAPILLARY_API_KEY")

# Output directory
OUTPUT_DIR = Path("./demo_output")
OUTPUT_DIR.mkdir(exist_ok=True)

print(f"Output directory: {OUTPUT_DIR.absolute()}")

In [None]:
# Import GeoAI-VLM package
from geoai_vlm import (
    # Pipeline functions (main API)
    describe_place,
    describe_point,
    describe_line,
    describe_bbox,
    
    # Query classes
    PointQuery,
    LineQuery,
    PlaceQuery,
    BBoxQuery,
    
    # Low-level classes
    ImageDescriber,
    MapillaryDownloader,
    
    # I/O functions
    load_geoparquet,
    load_results,
    list_results,
    summarize_results,
    export_formats,
    download_images,
)

from shapely.geometry import LineString
import geopandas as gpd

print("GeoAI-VLM loaded successfully!")

---
## 2. Loading Existing Results

Load previously generated GeoParquet files.

In [None]:
# List existing parquet files
data_dir = Path("../sultanahmet_istanbul")

if data_dir.exists():
    parquet_files = list_results(data_dir)
    print(f"Found parquet files: {len(parquet_files)}")
    for f in parquet_files:
        print(f"  - {f.name}")
else:
    print(f"Data directory not found: {data_dir}")

In [None]:
# Display results summary
results_path = data_dir / "results.parquet"

if results_path.exists():
    summary = summarize_results(results_path)
else:
    print(f"File not found: {results_path}")

In [None]:
# Load and inspect results
if results_path.exists():
    gdf = load_results(results_path)
    print(f"\nTotal records: {len(gdf)}")
    print(f"\nColumns:")
    for col in gdf.columns:
        dtype = gdf[col].dtype
        non_null = gdf[col].notna().sum()
        print(f"  {col}: {dtype} ({non_null} values)")
    
    # Show first few rows
    display(gdf.head())

In [None]:
# Load specific columns (for memory efficiency)
if results_path.exists():
    # Load only essential columns
    columns_to_load = ["image_id", "lat", "lon", "land_use_primary", "street_type"]
    gdf_subset = load_results(results_path, columns=columns_to_load)
    print(f"Loaded columns: {list(gdf_subset.columns)}")
    display(gdf_subset.head())

---
## 3. Point Query

Download and analyze images around a specific coordinate.

In [None]:
# Hagia Sophia coordinates
HAGIA_SOPHIA_LAT = 41.0086
HAGIA_SOPHIA_LON = 28.9802

# Download and describe images within 100m radius
# Note: A valid MLY_API_KEY is required to run this cell
if MLY_API_KEY != "YOUR_MAPILLARY_API_KEY":
    results_point = describe_point(
        lat=HAGIA_SOPHIA_LAT,
        lon=HAGIA_SOPHIA_LON,
        buffer_m=100,
        mly_api_key=MLY_API_KEY,
        output_dir=OUTPUT_DIR / "point_query",
        model_name="Qwen/Qwen3-VL-2B-Instruct",
        batch_size=4,
        max_images=10,  # Only 10 images for demo
        verbosity=1,
    )
    print(f"\nProcessed images: {len(results_point)}")
    display(results_point.head())
else:
    print("⚠️ MLY_API_KEY not set. Please enter a valid Mapillary API key.")

---
## 4. Line Query

Query images along a street or route.

In [None]:
# Define a street segment in Sultanahmet
street_coords = [
    (28.9760, 41.0070),  # Start (lon, lat)
    (28.9780, 41.0080),  # Midpoint
    (28.9800, 41.0086),  # End
]

# Create LineString
street_line = LineString(street_coords)

if MLY_API_KEY != "YOUR_MAPILLARY_API_KEY":
    results_line = describe_line(
        geometry=street_line,
        buffer_m=25,
        mly_api_key=MLY_API_KEY,
        output_dir=OUTPUT_DIR / "line_query",
        model_name="Qwen/Qwen3-VL-2B-Instruct",
        batch_size=4,
        max_images=10,
    )
    print(f"\nProcessed images along street: {len(results_line)}")
else:
    print("⚠️ MLY_API_KEY not set.")

---
## 5. Metadata Only (No VLM)

Fast metadata retrieval - no GPU required.

In [None]:
if MLY_API_KEY != "YOUR_MAPILLARY_API_KEY":
    query = PointQuery(lat=41.0082, lon=28.9784, buffer_m=200)
    
    metadata_gdf = download_images(
        query=query,
        mly_api_key=MLY_API_KEY,
        output_dir=OUTPUT_DIR / "metadata_only",
        metadata_only=True,
    )
    
    print(f"Found images: {len(metadata_gdf)}")
    display(metadata_gdf[["image_id", "captured_at", "distance_to_query_m"]].head(10))
else:
    print("⚠️ MLY_API_KEY not set.")

---
## 6. Low-Level API Usage

Use the classes directly for more control.

In [None]:
# Step 1: Create query
query = PointQuery(lat=41.0082, lon=28.9784, buffer_m=50)
print(f"Query: {query}")
print(f"Buffer: {query.buffer_m}m")

In [None]:
if MLY_API_KEY != "YOUR_MAPILLARY_API_KEY":
    # Step 2: Download images
    downloader = MapillaryDownloader(mly_api_key=MLY_API_KEY, verbosity=1)
    metadata_gdf = downloader.download(
        query=query,
        output_dir=OUTPUT_DIR / "low_level_api",
        resolution=1024,
        max_images=5,
    )
    print(f"Downloaded images: {len(metadata_gdf)}")
else:
    print("⚠️ MLY_API_KEY not set.")

In [None]:
# Step 3: Describe images with VLM
if MLY_API_KEY != "YOUR_MAPILLARY_API_KEY" and 'metadata_gdf' in dir() and len(metadata_gdf) > 0:
    describer = ImageDescriber(
        model_name="Qwen/Qwen3-VL-2B-Instruct",
        backend="auto",  # Uses VLLM if available
        prompt_template="geoai",
    )
    
    descriptions_df = describer.describe(
        image_dir=OUTPUT_DIR / "low_level_api",
        batch_size=4,
        resume=True,
    )
    print(f"Generated descriptions: {len(descriptions_df)}")
else:
    print("Download images first.")

---
## 7. Visualize Results

In [None]:
# Load results and display on map
if results_path.exists():
    gdf = load_geoparquet(results_path)
    
    try:
        # Interactive map (requires folium)
        m = gdf.explore(
            column="land_use_primary" if "land_use_primary" in gdf.columns else None,
            cmap="Set2",
            tooltip=["image_id", "land_use_primary", "street_type"] if "land_use_primary" in gdf.columns else ["image_id"],
            marker_kwds={"radius": 5},
        )
        display(m)
    except Exception as e:
        print(f"Interactive map not available: {e}")
        print("\nResults preview:")
        print(gdf[["image_id", "lat", "lon"]].head())

---
## 8. Analyze Results

In [None]:
if results_path.exists():
    gdf = load_geoparquet(results_path)
    
    # Land use distribution
    if "land_use_primary" in gdf.columns:
        print("Land Use Distribution:")
        print(gdf["land_use_primary"].value_counts())
    
    # Street type distribution
    if "street_type" in gdf.columns:
        print("\nStreet Type Distribution:")
        print(gdf["street_type"].value_counts())
    
    # Place character
    if "place_character" in gdf.columns:
        print("\nPlace Character Distribution:")
        print(gdf["place_character"].value_counts())

In [None]:
# Semantic tags analysis
if results_path.exists() and "semantic_tags" in gdf.columns:
    from collections import Counter
    
    all_tags = []
    for tags in gdf["semantic_tags"].dropna():
        if isinstance(tags, str):
            all_tags.extend([t.strip() for t in tags.split(",")])
    
    tag_counts = Counter(all_tags)
    print("Top 15 Semantic Tags:")
    for tag, count in tag_counts.most_common(15):
        print(f"  {tag}: {count}")

---
## 9. Export to Other Formats

In [None]:
if results_path.exists():
    gdf = load_geoparquet(results_path)
    
    # Export to multiple formats
    output_paths = export_formats(
        gdf,
        output_dir=OUTPUT_DIR / "exports",
        basename="analysis_results",
        formats="geoparquet geojson csv"
    )
    
    print("\nExported files:")
    for fmt, path in output_paths.items():
        print(f"  {fmt}: {path}")