In [39]:
import arrow
import requests
import re
from pathlib import Path
import pandas as pd
from geojson import Feature, FeatureCollection
import geojson
import json

In [40]:
response = requests.get(
    "https://annotations.allmaps.org/maps?manifestdomain=rosetta.slv.vic.gov.au"
)

In [41]:
data = response.json()

In [42]:
if Path("georeferenced_maps.csv").exists():
    df_old = pd.read_csv("georeferenced_maps.csv")
    start_date = arrow.get(df_old["modified"].max())
    first_run = False
else:
    start_date = arrow.get("2025-01-01")
    first_run = True

In [43]:
def get_label(field):
    for value in field.values():
        return " / ".join(value)
    

maps = []
features = []
for item in data["items"]:
    if arrow.get(item["modified"]) > start_date:
        map_url = item["id"]
        map_id = map_url.strip("/").split("/")[-1]
        properties = {
            "allmaps_map_id": map_url,
            "allmaps_manifest_id": item["body"]["_allmaps"]["image"]["canvases"][0]["manifests"][0][
                "id"
            ],
            "image_id": item["target"]["source"]["id"],
            "ie_id": re.search(r"\/(IE\d+):", item["target"]["source"]["id"]).group(1),
            "fl_id": re.search(r":(FL\d+)\.", item["target"]["source"]["id"]).group(1),
            "map_title": get_label(item["target"]["source"]["partOf"][0]["label"]),
            "manifest_title": get_label(item["target"]["source"]["partOf"][0]["partOf"][0]["label"]),
            "area": item["body"]["_allmaps"]["area"],
            "modified": item["modified"],
            "created": item["created"]
        }
        allmaps_maps_dir = Path("maps")
        allmaps_geojson_dir = Path("geojson")
        allmaps_maps_dir.mkdir(exist_ok=True, parents=True)
        allmaps_geojson_dir.mkdir(exist_ok=True, parents=True)
        response = requests.get(item["id"])
        Path(allmaps_maps_dir, f"{map_id}.json").write_text(response.text)
        response = requests.get(f"{map_url.strip("/")}.geojson")
        Path(allmaps_geojson_dir, f"{map_id}.geojson").write_text(response.text)
        geom = response.json()["geometry"]
        geomap = properties.copy()
        geomap["geometry"] = geom
        maps.append(geomap)
        features.append(Feature(geometry=geom, properties=properties))

In [47]:
df_new = pd.DataFrame(maps)

if first_run:
    Path("georeferenced_maps.geojson").write_text(geojson.dumps(FeatureCollection(features=features)))
    df_new.to_csv("georeferenced_maps.csv", index=False)
else:
    geodata = geojson.loads(Path("georeferenced_maps.geojson").read_text())
    geodata["features"] += features
    Path("georeferenced_maps.geojson").write_text(geojson.dumps(geodata))
    df = pd.concat([df_old, df_new]).sort_values("modified", ascending=False)
    df.drop_duplicates("allmaps_map_id", inplace=True)
    df.to_csv("georeferenced_maps.csv", index=False)    

In [38]:
df["allmaps_link"] = df["allmaps_map_id"].apply(lambda x: json.dumps({"href" : f"https://viewer.allmaps.org/?url={x}", "label": "View in the AllMaps viewer"}))
df["slv_link"] = df.apply(lambda x: json.dumps({"href": f"https://viewer.slv.vic.gov.au/?entity={x["ie_id"]}&file={x["fl_id"]}&mode=browse", "label": "View in the SLV viewer"}), axis=1)
df["thumbnail"] = df["image_id"].apply(lambda x: json.dumps({"img_src": f"{x}/full/!100,100/0/default.jpg"}))
df[["thumbnail", "manifest_title", "map_title", "modified", "allmaps_link", "slv_link"]].to_csv("georeferenced_maps_datasette.csv", index=False)   