In [1]:
import codecs
import pandas as pd
import geopandas
import unicodedata
from shapely.geometry import LineString, Point

hikes = pd.read_json(codecs.open('data/rawdata.json', 'r', 'utf-8'))

### Functions
Data has lists inside lists. Reduce it down to a level where we only have a list of coordinates

In [2]:
def reduceCordsToOneLevelList(coordinates):
    while any(isinstance(x, list) for x in coordinates):
        if isinstance(coordinates[0][0], list):
            coordinates=coordinates[0]
        else:
            break
    return coordinates

### Get data from JSON
load raw json, traverse each trip/point and build two lists: points and records

In [3]:
points = []
lines = []

for facilityId in hikes.index:
    recordType = hikes.loc[facilityId]["the_geom"]["type"]
    record = {
        "facilityId":             facilityId,
        "coordinates":            reduceCordsToOneLevelList(hikes.loc[facilityId]["the_geom"]["coordinates"]),
        "name":                   hikes.loc[facilityId]["name"],
        "shortdescription":       hikes.loc[facilityId]["shortdescription"],
        "longdescription":        hikes.loc[facilityId]["longdescription"],
        "subcategoryname":        hikes.loc[facilityId]["subcategoryname"],
        "lastedited":             hikes.loc[facilityId]["lastedited"],
        "maincategory_name":      hikes.loc[facilityId]["subcategory"]["maincategory_name"],
        "organisation_name":      hikes.loc[facilityId]["organisation"]["name"],
        "organisation_email":     hikes.loc[facilityId]["organisation"]["email"],
        "organisation_telephone": hikes.loc[facilityId]["organisation"]["telephone"],
        "organisation_url":       hikes.loc[facilityId]["organisation"]["url"],
        "organisation_resource":  hikes.loc[facilityId]["organisation"]["resource_uri"],
    }

    for attribute in hikes.loc[facilityId]["attributes"]:
        attributeName = attribute["attributename"]
        record["attributes_" + attributeName.strip()] = True

    for attribute in hikes.loc[facilityId]["subcategory"]["attributes"]:
        attributeName = attribute["attributename"]
        record["subcategory_" + attributeName.strip()] = True

    if recordType=="Point":  # Single point coordinate
        points.append(record)
    else:  # A Set of coordinates, aka lines(routes)
        lines.append(record)

### Save the data to GPKG
Create a geodataframe with the geometry taken from coordinates.
Save both lines and points as a layer in GPKG-file.

In [4]:
linesDf=pd.DataFrame(lines)
linesGdf = geopandas.GeoDataFrame(linesDf, geometry=linesDf.coordinates.apply(LineString), crs="ESRI:103532")
linesGdf.drop("coordinates", axis=1, inplace=True)
linesGdf.fillna(False, inplace=True)
linesGdf.set_index("facilityId", inplace=True)
linesGdf.to_file("data/Ud_I_Naturen.gpkg", driver="GPKG", layer="lines")

pointsDf=pd.DataFrame(points)
pointsGdf = geopandas.GeoDataFrame(pointsDf, geometry=pointsDf.coordinates.apply(Point), crs="ESRI:103532") 
pointsGdf.drop("coordinates", axis=1, inplace=True)
pointsGdf.fillna(False, inplace=True)
pointsGdf.set_index("facilityId", inplace=True)
pointsGdf.to_file("data/Ud_I_Naturen.gpkg", driver="GPKG", layer="points")