In [1]:
import pandas as pd
import lancedb
import json

In [2]:
layers = []

In [3]:
# Done
natural_lands_map = [
    {
        "name": "Natural Lands Map",
        "dataset": "WRI/SBTN/naturalLands/v1/2020",
        "resolution": 30,
        "description": "The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems. 'Natural' and 'non-natural' definitions were adapted from the Accountability Framework initiative's definition of a natural ecosystem as \"one that substantially resembles - in terms of species composition, structure, and ecological function - what would be found in a given area in the absence of major human impacts\" and can include managed ecosystems as well as degraded ecosystems that are expected to regenerate either naturally or through management (AFi 2024). The SBTN Natural Lands Map operationalizes this definition by using proxies based on available data that align with AFi guidance to the extent possible.",
        "year": 2020,
        "band": "natural",
        "type": "Image",
        "visualization_parameters": {"min":0, "max":1, "palette": ["#969696","#a8ddb5"]},
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {
                    "value": 0, "color_hexcode": "#969696", "description": "Non-natural land"
                }, 
                {
                    "value": 1, "color_hexcode": "#a8ddb5", "description": "Natural land"
                }
            ]
        },
    }, 
    {
        "name": "Natural Lands - Classification",
        "dataset": "WRI/SBTN/naturalLands/v1/2020",
        "resolution": 30,
        "description": "The 'Natural Lands - Classification' layer shows the natural areas labeled by land cover. The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems.",
        "year": 2020,
        "band": "classification",
        "type": "Image",
        "visualization_parameters": {"min": 2, "max": 21, "palette":[
            "#246E24",
            "#B9B91E",
            "#6BAED6",
            "#06A285",
            "#FEFECC",
            "#ACD1E8",
            "#589558",
            "#093D09",
            "#DBDB7B",
            "#99991A",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3",
            "#D3D3D3"]
        },
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {
                    "value": 2,
                    "color_hexcode": "#246E24",
                    "description": "natural forests"
                },
                {
                    "value": 3,
                    "color_hexcode": "#B9B91E",
                    "description": "natural short vegetation"
                },
                {
                    "value": 4,
                    "color_hexcode": "#6BAED6",
                    "description": "natural water"
                },
                {
                    "value": 5,
                    "color_hexcode": "#06A285",
                    "description": "mangroves"
                },
                {
                    "value": 6,
                    "color_hexcode": "#FEFECC",
                    "description": "bare"
                },
                {
                    "value": 7,
                    "color_hexcode": "#ACD1E8",
                    "description": "snow"
                },
                {
                    "value": 8,
                    "color_hexcode": "#589558",
                    "description": "wet natural forests"
                },
                {
                    "value": 9,
                    "color_hexcode": "#093D09",
                    "description": "natural peat forests"
                },
                {
                    "value": 10,
                    "color_hexcode": "#DBDB7B",
                    "description": "wet natural short vegetation"
                },
                {
                    "value": 11,
                    "color_hexcode": "#99991A",
                    "description": "natural peat short vegetation"
                },
                {
                    "value": 12,
                    "color_hexcode": "#D3D3D3",
                    "description": "crop"
                },
                {
                    "value": 13,
                    "color_hexcode": "#D3D3D3",
                    "description": "built"
                },
                {
                    "value": 14,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural tree cover"
                },
                {
                    "value": 15,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural short vegetation"
                },
                {
                    "value": 16,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural water"
                },
                {
                    "value": 17,
                    "color_hexcode": "#D3D3D3",
                    "description": "wet non-natural tree cover"
                },
                {
                    "value": 18,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural peat tree cover"
                },
                {
                    "value": 19,
                    "color_hexcode": "#D3D3D3",
                    "description": "wet non-natural short vegetation"
                },
                {
                    "value": 20,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural peat short vegetation"
                },
                {
                    "value": 21,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural bare"
                }
            ]
        }
    },
]
layers.extend(natural_lands_map)

In [4]:
# Done
import csv
with open("../data/global_tree_cover_mappings_annual_land_class.csv", "r") as f: 
    csv_data = f.readlines()

csv_reader = csv.DictReader(csv_data)

value_mappings = [
    {
        "value": int(row["Map value"]),
        "color_hexcode": row["Color code"],
        "description": f"{row['General class']} {row['General-Sub-class']} {row['Sub-class']}"
    }
    for row in csv_reader
]

glad_global_land_cover = [
    {
        "name": f"Annual global land cover and land use {year}",
        "dataset": f"projects/glad/GLCLU2020/v2/LCLUC_{year}",
        "resolution": 30,
        "description": "Global map with continuous measures of bare ground and tree height inside and outside of wetlands, seasonal water percent, and binary labels of built-up, permanent snow/ice, and cropland.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "visualization_parameters": {"min":0,"max":255,"palette":["FEFECC","FAFAC3","F7F7BB","F4F4B3","F1F1AB","EDEDA2","EAEA9A","E7E792","E4E48A",
"E0E081","DDDD79","DADA71","D7D769","D3D360","D0D058","CDCD50","CACA48","C6C63F","C3C337","C0C02F","BDBD27","B9B91E","B6B616",
"B3B30E","B0B006","609C60","5C985C","589558","549254","508E50","4C8B4C","488848","448544","408140","3C7E3C","387B38","347834",
"317431","2D712D","296E29","256B25","216721","1D641D","196119","155E15","115A11","0D570D","095409","065106","643700","643a00",
"643d00","644000","644300","644600","644900","654c00","654f00","655200","655500","655800","655a00","655d00","656000","656300",
"666600","666900","666c00","666f00","667200","667500","667800","667b00","ff99ff","FC92FC","F98BF9","F685F6","F37EF3","F077F0",
"ED71ED","EA6AEA","E763E7","E45DE4","E156E1","DE4FDE","DB49DB","D842D8","D53BD5","D235D2","CF2ECF","CC27CC","C921C9","C61AC6",
"C313C3","C00DC0","BD06BD","bb00bb","000003","000004","000005","BFC0C0","B7BDC2","AFBBC4","A8B8C6","A0B6C9","99B3CB","91B1CD",
"89AFD0","82ACD2","7AAAD4","73A7D6","6BA5D9","64A3DB","5CA0DD","549EE0","4D9BE2","4599E4","3E96E6","3694E9","2E92EB","278FED",
"1F8DF0","188AF2","1088F4","0986F7","55A5A5","53A1A2","519E9F","4F9B9C","4D989A","4B9597","499294","478F91","458B8F","43888C",
"418589","3F8286","3D7F84","3B7C81","39797E","37767B","357279","336F76","316C73","2F6970","2D666E","2B636B","296068","285D66",
"bb93b0","B78FAC","B48CA9","B189A6","AE85A2","AA829F","A77F9C","A47B99","A17895","9E7592","9A718F","976E8C","946B88","916885",
"8D6482","8A617F","875E7B","845A78","815775","7D5472","7A506E","774D6B","744A68","714765","de7cbb","DA77B7","D772B3","D46EAF",
"D169AB","CE64A8","CB60A4","C85BA0","C4579C","C15298","BE4D95","BB4991","B8448D","B54089","B23B86","AF3682","AB327E","A82D7A",
"A52976","A22473","9F1F6F","9C1B6B","991667","961264","000000","000000","000000",
"1964EB","1555E4","1147DD","0E39D6","0A2ACF","071CC8","030EC1","0000BA",
"0000BA","040464","0000FF","3051cf","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"547FC4","4D77BA","466FB1","4067A7","395F9E","335895","335896","335897","ff2828","ffffff","d0ffff","ffe0d0","ff7d00","fac800","c86400",
"fff000","afcd96","afcd96","64dcdc","00ffff","00ffff","00ffff","111133","000000"]},
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": value_mappings 
        }
    } for year in [2000,2005,2010,2015,2020]
]
#layers.extend(glad_global_land_cover)

In [5]:
# Done
# TODO: review other datasets in Confluence: 
# - probability cultivated grassland
# - dominant grass class
# - probability natural/semi-natural grassland
dominant_grasslands = [
    {
        "name": "Dominant Grasslands",
        "dataset": f"projects/global-pasture-watch/assets/ggc-30m/v1/grassland_c/{year}",
        "resolution": 30,
        "description": "This dataset provides global annual dominant class maps of grasslands (cultivated and natural/semi-natural) from 2000 to 2022 at 30-m spatial resolution. Produced by Land & Carbon Lab Global Pasture Watch initiative, the mapped grassland extent includes any land cover type, which contains at least 30% of dry or wet low vegetation, dominated by grasses and forbs (less than 3 meters) and a: maximum of 50% tree canopy cover (greater than 5 meters), maximum of 70% of other woody vegetation (scrubs and open shrubland), and a maximum of 50% active cropland cover in mosaic landscapes of cropland & other vegetation.",
        "year": year,
        "band": "dominant_class",
        "type": "Image",
        "metadata": {
            "layer_type": "categorical",
            "value_mappings": [
                {"value": 0, "color_hexcode": "#ffffff", "description": "Other"},
                {"value": 1, "color_hexcode": "#ff9916", "description": "Cultivated grassland"},
                {"value": 2, "color_hexcode": "#ffcd73", "description": "Natural/Semi-natural grassland"}, 
            ]
        }, 
        "visualization_parameters":{
            "opacity":1, 
            "min":1,
            "max":2,
            "palette":["ff9916","ffcd73"]
        },
    } for year in range(2001, 2023)
]
layers.extend(dominant_grasslands)

In [6]:
# Done
rgb = lambda r,g,b: '#%02x%02x%02x' % (r,g,b)

global_map_of_forest_types = [
    {
        "name": "Global map of forest types 2020",
        "dataset": "JRC/GFC2020_subtypes/V0",
        "resolution": 10,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": 2020,
        "band": "GFT",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 1, "color_hexcode": "#78c679", "description": "Naturally regenerating forest"},
                {"value": 10, "color_hexcode": "#006837", "description": "Primary forest"},
                {"value": 20, "color_hexcode": "#cc6600", "description": "Planted/Plantation forest"}, 
            ]
        }, 
        "visualization_parameters": {"min": 0, "max": 20, "palette":  [
      rgb(255, 255, 255), rgb(120, 198, 121), rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 104, 55), rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(204, 102, 0)
    ]}
    }
]
layers.extend(global_map_of_forest_types)

In [7]:
# Done
glad_cropland_extent = [
    {
        "name": "Global cropland extent(2003-2019)",
        "dataset": f"projects/glad/GLCLU2020/Cropland_{year}",
        "resolution": 30,
        "description": "The 2000-2019 globally consistent cropland extent time-series at 30-m spatial resolution was derived from the Landsat satellite data archive. Cropland is defined as land used for annual and perennial herbaceous crops for human consumption, forage (including hay), and biofuel. The crop mapping was performed in four-year intervals.",
        "year": year,
        "band": "b1",
        "type": "Image",
        # TODO: validate "lightgray" used for value 0
        "visualization_parameters": {"min": 0, "max": 1, "palette": [rgb(211,211,211), rgb(74,216,74)]},
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_extent)

In [8]:
# Done
global_tree_canopy_height = [
    {
        "name": "Global Canopy Height Maps",
        "dataset": "projects/meta-forest-monitoring-okw37/assets/CanopyHeight",
        "resolution": 1,
        "description": "The Global Canopy Height Maps dataset offers comprehensive insights into tree canopy heights worldwide, providing an overview of tree canopy presence and height for the analysed period (2009-2020), with eighty per cent of the data obtained from imagery acquired between 2018 and 2020. The sub-meter resolution canopy height maps using self-supervised learning and a vision transformer trained on Aerial and GEDI Lidar.",
        "year": 2020,
        "band": "cover_code",
        "type": "ImageCollection",
        "metadata": {},
        # Palette == viridis
        "visualization_parameters": {"min": 0, "max": 25 , "palette": [
            "#fde725",
            "#e5e419",
            "#c8e020",
            "#addc30",
            "#90d743",
            "#75d054",
            "#5ec962",
            "#48c16e",
            "#35b779",
            "#28ae80",
            "#20a486",
            "#1f9a8a",
            "#21918c",
            "#24868e",
            "#287c8e",
            "#2c728e",
            "#31688e",
            "#365d8d",
            "#3b528b",
            "#404688",
            "#443983",
            "#472d7b",
            "#481f70",
            "#471063",
            "#440154"
    ]}
    }
]
layers.extend(global_tree_canopy_height)

In [9]:
# Done
with open("../data/global_tree_cover_mappings_2000_2020_change.csv", "r") as f: 
    csv_data = f.readlines()

csv_reader = csv.DictReader(csv_data)

value_mappings = [
    {
        "value": int(row["Map value"]),
        "color_hexcode": row["Color code"],
        "description": f"{row['General class']} {row['General-Sub-class']} {row['Sub-class']}"
    }
    for row in csv_reader
]
glad_global_land_cover_change = [
        {
        "name": "Global land cover and land use change (2000-2020)",
        "dataset": "projects/glad/GLCLU2020/v2/LCLUC",
        "resolution": 30,
        "description": "The GLAD Global Land Cover and Land Use Change dataset quantifies changes in forest extent and height, cropland, built-up lands, surface water, and perennial snow and ice extent from the year 2000 to 2020 at 30-m spatial resolution. Land cover and land use states of 2020 with tranistions relative to 2000 labeled.",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": value_mappings
        }, 
        "visualization_parameters": {"min":0,"max":255,"palette":["FEFECC","FAFAC3","F7F7BB","F4F4B3","F1F1AB","EDEDA2","EAEA9A","E7E792","E4E48A",
"E0E081","DDDD79","DADA71","D7D769","D3D360","D0D058","CDCD50","CACA48","C6C63F","C3C337","C0C02F","BDBD27","B9B91E","B6B616",
"B3B30E","B0B006","609C60","5C985C","589558","549254","508E50","4C8B4C","488848","448544","408140","3C7E3C","387B38","347834",
"317431","2D712D","296E29","256B25","216721","1D641D","196119","155E15","115A11","0D570D","095409","065106","643700","643a00",
"643d00","644000","644300","644600","644900","654c00","654f00","655200","655500","655800","655a00","655d00","656000","656300",
"666600","666900","666c00","666f00","667200","667500","667800","667b00","ff99ff","FC92FC","F98BF9","F685F6","F37EF3","F077F0",
"ED71ED","EA6AEA","E763E7","E45DE4","E156E1","DE4FDE","DB49DB","D842D8","D53BD5","D235D2","CF2ECF","CC27CC","C921C9","C61AC6",
"C313C3","C00DC0","BD06BD","bb00bb","000003","000004","000005","BFC0C0","B7BDC2","AFBBC4","A8B8C6","A0B6C9","99B3CB","91B1CD",
"89AFD0","82ACD2","7AAAD4","73A7D6","6BA5D9","64A3DB","5CA0DD","549EE0","4D9BE2","4599E4","3E96E6","3694E9","2E92EB","278FED",
"1F8DF0","188AF2","1088F4","0986F7","55A5A5","53A1A2","519E9F","4F9B9C","4D989A","4B9597","499294","478F91","458B8F","43888C",
"418589","3F8286","3D7F84","3B7C81","39797E","37767B","357279","336F76","316C73","2F6970","2D666E","2B636B","296068","285D66",
"bb93b0","B78FAC","B48CA9","B189A6","AE85A2","AA829F","A77F9C","A47B99","A17895","9E7592","9A718F","976E8C","946B88","916885",
"8D6482","8A617F","875E7B","845A78","815775","7D5472","7A506E","774D6B","744A68","714765","de7cbb","DA77B7","D772B3","D46EAF",
"D169AB","CE64A8","CB60A4","C85BA0","C4579C","C15298","BE4D95","BB4991","B8448D","B54089","B23B86","AF3682","AB327E","A82D7A",
"A52976","A22473","9F1F6F","9C1B6B","991667","961264","000000","000000","000000",
"1964EB","1555E4","1147DD","0E39D6","0A2ACF","071CC8","030EC1","0000BA",
"0000BA","040464","0000FF","3051cf","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"547FC4","4D77BA","466FB1","4067A7","395F9E","335895","335896","335897","ff2828","ffffff","d0ffff","ffe0d0","ff7d00","fac800","c86400",
"fff000","afcd96","afcd96","64dcdc","00ffff","00ffff","00ffff","111133","000000"]}
    }
]
layers.extend(glad_global_land_cover_change)

In [10]:
# Done
glad_cropland_gain = [
    {
        "name": "Global cropland gain (2003-2019)",
        "dataset": f"projects/glad/GLCLU2020/Cropland_gain",
        "resolution": 30,
        "description": "Global cropland gain between 2000 and 2019.",
        "year": year,
        "band": "b1",
        "type": "Image",
         # TODO: validate "lightgray" used for value 0
        "visualization_parameters": {"min": 0, "max": 1, "palette": [rgb(211,211,211), rgb(74,216,74)]},
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_gain)

In [11]:
# Done
glad_cropland_loss = [
    {
        "name": "Global cropland loss (2003-2019)",
        "dataset": "projects/glad/GLCLU2020/Cropland_loss",
        "resolution": 30,
        "description": "Global cropland loss between 2000 and 2019.",
        "year": year,
        "band": "b1",
        "type": "Image",
        # TODO: validate "lightgray" used for value 0
        "visualization_parameters": {"min": 0, "max": 1, "palette": [rgb(211,211,211), rgb(74,216,74)]},
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_loss)

In [12]:
import matplotlib as mpl

cm = mpl.colormaps['Greens'].resampled(10)
palette = [ rgb(int(_r),int(_g),int(_b)) for _r,_g,_b, _ in cm(range(5)) * 255]
tropical_tree_cover = [
    {
    
        "name": "Tropical tree cover",
        "dataset": "projects/wri-datalab/TropicalTreeCover",
        "resolution": 10,
        "description": "Land & Carbon Lab’s Tropical Tree Cover data set, developed in collaboration with World Resource Institutes Global Restoration Initiative, uses globally-consistent satellite data at a 10-meter resolution to map tree cover across the tropics with greater granularity, improving our ability to quantify tree cover on non-forest lands like urban areas and cropland, and monitor trees at small spatial scales. This allows decision makers to better understand trees outside of dense forests, supports local communities who protect and restore these ecosystems to monitor their work and encourages greater investment in their projects.",
        "year": 2020,
        "band": "b1",
        "type": "ImageCollection",
        "visualization_parameters": {"min": 0, "max": 100, "palette": palette},
        "metadata": {
            "layer_type": "continuous", 
            "value_mappings": {"min": 0, "max": 100} # TODO: should we include an additional text field to further describe what the min/max values represent? 
        }
    }
]
#layers.extend(tropical_tree_cover)

In [13]:
# Done
dynamic_world_landcover = [
    {
        "name": "Dynamic World",
        "dataset": "GOOGLE/DYNAMICWORLD/V1",
        "resolution": 10,
        "description": "Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes. Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform.",
        "year": 2024,
        "band": "label",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {"value": 0, "color_hexcode": "#419bdf", "description": "water"},
                {"value": 1, "color_hexcode": "#397d49", "description": "trees"},
                {"value": 2, "color_hexcode": "#88b053", "description": "grass"}, 
                {"value": 3, "color_hexcode": "#7a87c6", "description": "flooded_vegetation"},
                {"value": 4, "color_hexcode": "#e49635", "description": "crops"},
                {"value": 5, "color_hexcode": "#dfc35a", "description": "shrub_and_scrub"}, 
                {"value": 6, "color_hexcode": "#c4281b", "description": "built"}, 
                {"value": 7, "color_hexcode": "#a59b8f", "description": "bare"}, 
                {"value": 8, "color_hexcode": "#b39fe1", "description": "snow_and_ice"}, 
            ]   
        }, 
        "visualization_parameters": {"min":1, "max":8, "palette":[
    '419bdf', '397d49', '88b053', '7a87c6', 'e49635', 'dfc35a', 'c4281b',
    'a59b8f', 'b39fe1']}
    }
]
layers.extend(dynamic_world_landcover)

In [14]:
# Done
global_forest_above_ground_biomass = [
    {
        "name": f"Global Forest Above Ground Biomass{' Uncertainty' if band == 'SD' else ''}",
        "dataset": f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_{band}_{year}_v4",
        "resolution": 100,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": year,
        "band": band,
        "type": "ImageCollection",
        "metadata":{
            "value_mappings": {} # TODO
        } , 
        "visualization_parameters": {"min":1,  "max":450, "palette": ["#C6ECAE","#A1D490","#7CB970","#57A751","#348E32", "#267A29","#176520","#0C4E15","#07320D","#031807"]}
    } for year in [2010, 2020] for band in ["AGB", "SD"]
]
#layers.extend(global_forest_above_ground_biomass)

In [15]:
import pathlib
import pandas as pd

visualization_parameters = {}

p = pathlib.Path("../data")
data_files = list(p.glob("**/*_TA.csv"))
for d in data_files: 
    df = pd.read_csv(d)
    crop_columns = [c for c in df.columns if c.endswith("_A")]
    _min  = df[crop_columns].min().min()
    _max = df[crop_columns].max().max()

    _layers = ["yield", "physical_area", "harvest_area", "production"]
    for layer in _layers: 
        if layer not in str(d): 
            continue
        visualization_parameters[layer] = {"min": _min, "max": _max}

cm = mpl.colormaps['YlGn'].resampled(5)
visualization_parameters["yield"]["palette"] = [ rgb(int(_r),int(_g),int(_b)) for _r,_g,_b, _ in cm(range(5)) * 255]
visualization_parameters["harvest_area"]["palette"] = [ rgb(int(_r),int(_g),int(_b)) for _r,_g,_b, _ in cm(range(5)) * 255]

cm = mpl.colormaps['Blues'].resampled(5)
visualization_parameters["physical_area"]["palette"] = [ rgb(int(_r),int(_g),int(_b)) for _r,_g,_b, _ in cm(range(5)) * 255]

cm = mpl.colormaps['RdPu'].resampled(5)
visualization_parameters["production"]["palette"] = [ rgb(int(_r),int(_g),int(_b)) for _r,_g,_b, _ in cm(range(5)) * 255]

print(visualization_parameters)

global_cropland_yield_and_area = [
    {
        "name": f"Global Cropland Yield and Area ({layer}:{crop_name})",
        "dataset": f"users/cgiardata/spam_data/2020/{layer}/{crop_id}_A",
        "resolution": 10000,
        "description": f"A global dataset providing fine spatial resolution estimates of crop area, yield, and production, facilitating detailed analysis of agricultural production patterns and informing policy decisions. Crop name: {crop_name}, Layer: {layer}",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {"units": units, "layer": layer, "crop_name":crop_name},
        "visualization_parameters": visualization_parameters[layer]
    } 
    for layer, units in [
        ("yield", "kg/ha"), ("physical_area","ha"), ("harvest_area","ha"), ("production","Mt")] 
    for crop_id, crop_name in {
        "WHEA":"Wheat", "RICE":"Rice","MAIZ":"Maize",
        "BARL":"Barley","MILL":"Small Millet","PMIL":"Pearl Millet",
        "SORG":"Sorghum","OCER":"Other Cereals","POTA":"Potato",
        "SWPO":"Sweet Potato", "YAMS":"Yams", "CASS":"Cassava", 
        "ORTS":"Other Roots","BEAN":"Bean","CHIC":"Chickpea",
        "COWP":"Cowpea","PIGE":"Pigeon Pea","LENT":"Lentil",
        "OPUL":"Other Pulses","SOYB":"Soybean","GROU":"Groundnut",
        "CNUT":"Coconut","OILP":"Oilpalm","SUNF":"Sunflower",
        "RAPE":"Rapeseed","SESA":"Sesame Seed","OOIL":"Other Oil Crops",
        "SUGC":"Sugarcane","SUGB":"Sugarbeet","COTT":"Cotton",
        "OFIB":"Other Fibre Crops","COFF":"Arabic Coffee","RCOF":"Robust Coffee",
        "COCO":"Cocoa", "TEAS":"Tea", "TOBA":"Tobacco",
        "BANA":"Banana","PLNT":"Plantain","CITR":"Citrus",
        "TROF":"Other Tropical Fruit", "TEMF":"Temperate Fruit", "TOMA":"Tomato",
        "ONIO":"Onion", "VEGE":"Other Vegetables", "RUBB":"Rubber",
        "REST":"Rest Of Crops",
    }.items()
]
#layers.extend(global_cropland_yield_and_area)

{'physical_area': {'min': 0.0, 'max': 25652.5, 'palette': ['#f7fbff', '#c6dbef', '#6baed6', '#2171b5', '#08306b']}, 'production': {'min': 0.0, 'max': 1209908.8, 'palette': ['#fff7f3', '#fcc5c0', '#f768a1', '#ae007e', '#49006a']}, 'harvest_area': {'min': 0.0, 'max': 52845.1, 'palette': ['#ffffe5', '#d9f0a3', '#78c679', '#238443', '#004529']}, 'yield': {'min': 0.0, 'max': 6777950.5, 'palette': ['#ffffe5', '#d9f0a3', '#78c679', '#238443', '#004529']}}


In [16]:
disturbance_alert_drivers = [
    {
        "name": f"WRI Disturbance Alert Drivers",
        "dataset": f"wri-dist-alert-drivers",
        "resolution": 30,
        "description": "This dataset provides information about possible drivers and or causes for the vegetation disturbance alerts data. There are five posssible drivers that are registered: wildfires, changes in crop cycle, flooding, conversion of natural lands, and  other conversions. Alerts associated with flooding events. Alerts in natural lands associated with uncontrolled fires. Fire-related alerts may be caused by natural events or human activity and may precede conversion events. Alerts associated with changes in crop planting/harvesting cycles. Alerts in natural lands likely related to human activity. All remaining confirmed DIST alerts with >50% vegetation loss that are not classified as conversion, wildfire, crop cycles, or flooding. May include drought, pests, landslides, etc. These are factors that are identified as the most probable underlying drivers for the vegetation disturbances, but are not always the direct causes.",
        "year": 2024,
        "band": "b1",
        "type": "ImageCollection",
        "metadata":{
            "layer_type": "categorical", 
            "value_mappings": [
                {"value": 1, "color_hexcode": rgb(154,59,1), "description": "wildfire"},
                {"value": 2, "color_hexcode": rgb(171,140,0), "description": "crop_cycle"}, 
                {"value": 3, "color_hexcode": rgb(0, 0, 255), "description": "flooding"},
                {"value": 4, "color_hexcode": rgb(219,39,119), "description": "conversion"},
                {"value": 5, "color_hexcode": rgb(147,51,234), "description": "other_conversion"}, 
                
            ]   
        }, 
        "visualization_parameters": {"min": 1, "max":5, "palette": [rgb(154,59,1),rgb(171,140,0), rgb(0, 0, 255), rgb(219,39,119),rgb(147,51,234) ]}
    } 
]
layers.extend(disturbance_alert_drivers)

In [17]:
layers = [{
        **layer, 
        "metadata": json.dumps(layer.get("metadata", {})), 
        "visualization_parameters": json.dumps(layer["visualization_parameters"])
    } for layer in layers
]

In [18]:
df = pd.DataFrame(layers)
df

Unnamed: 0,name,dataset,resolution,description,year,band,type,visualization_parameters,metadata
0,Natural Lands Map,WRI/SBTN/naturalLands/v1/2020,30,The SBTN Natural Lands Map v1 is a 2020 baseli...,2020,natural,Image,"{""min"": 0, ""max"": 1, ""palette"": [""#969696"", ""#...","{""layer_type"": ""categorical"", ""value_mappings""..."
1,Natural Lands - Classification,WRI/SBTN/naturalLands/v1/2020,30,The 'Natural Lands - Classification' layer sho...,2020,classification,Image,"{""min"": 2, ""max"": 21, ""palette"": [""#246E24"", ""...","{""layer_type"": ""categorical"", ""value_mappings""..."
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2001,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2002,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2003,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
5,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2004,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
6,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2005,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
7,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2006,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
8,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2007,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."
9,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2008,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""..."


In [19]:
df.count()

name                        44
dataset                     44
resolution                  44
description                 44
year                        44
band                        44
type                        44
visualization_parameters    44
metadata                    44
dtype: int64

In [20]:
from langchain_ollama.embeddings import OllamaEmbeddings

embedder = OllamaEmbeddings(model="nomic-embed-text")

df["vector"] = embedder.embed_documents(list((df["name"] + " - " +df["dataset"] + " - " + df["description"] ).values))
df.head()

Unnamed: 0,name,dataset,resolution,description,year,band,type,visualization_parameters,metadata,vector
0,Natural Lands Map,WRI/SBTN/naturalLands/v1/2020,30,The SBTN Natural Lands Map v1 is a 2020 baseli...,2020,natural,Image,"{""min"": 0, ""max"": 1, ""palette"": [""#969696"", ""#...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.0050564115, 0.0059453645, -0.21542586, -0.0..."
1,Natural Lands - Classification,WRI/SBTN/naturalLands/v1/2020,30,The 'Natural Lands - Classification' layer sho...,2020,classification,Image,"{""min"": 2, ""max"": 21, ""palette"": [""#246E24"", ""...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.015649244, 0.0022550165, -0.21761675, -0.01..."
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2001,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055874716, 0.0032470312, -0.22464111, 0.001..."
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2002,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055449624, 0.0018201179, -0.22524701, 0.000..."
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2003,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05611936, 0.0032223125, -0.22531708, 0.0001..."


In [21]:
db = lancedb.connect("s3://zeno-static-data/layers-context")
table = db.create_table("zeno-layers-context-v1.2.1", mode="overwrite", data=df)
table = db.create_table("zeno-layers-context-latest", mode="overwrite", data=df)

[2025-01-27T16:50:30Z WARN  lance_table::io::commit] Using unsafe commit handler. Concurrent writes may result in data loss. Consider providing a commit handler that prevents conflicting writes.


In [22]:
query_embedding = embedder.embed_query("Grasslands preservation in Brazil")

In [23]:
results = table.search(query_embedding).limit(30)
results.to_pandas()

Unnamed: 0,name,dataset,resolution,description,year,band,type,visualization_parameters,metadata,vector,_distance
0,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2006,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.057723086, 0.0040142275, -0.22560917, 0.000...",0.671497
1,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2005,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05711652, 0.0043431735, -0.22533567, -0.001...",0.674311
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2011,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05348556, 0.0036516907, -0.22355503, 0.0015...",0.674318
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2004,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05662974, 0.0032411597, -0.22615424, 0.0003...",0.674625
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2003,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05611936, 0.0032223125, -0.22531708, 0.0001...",0.67506
5,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2001,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055874716, 0.0032470312, -0.22464111, 0.001...",0.675357
6,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2002,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055449624, 0.0018201179, -0.22524701, 0.000...",0.675945
7,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2013,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.051458344, 0.004883893, -0.22452874, 0.0013...",0.677118
8,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2007,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.057227712, 0.0046041245, -0.22369905, 0.000...",0.677169
9,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2008,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055125456, 0.0020664048, -0.22447293, 0.001...",0.677831


In [24]:
filtered_results = table.search(query_embedding).limit(30).where("band != 'b1'")
filtered_results.to_pandas()

Unnamed: 0,name,dataset,resolution,description,year,band,type,visualization_parameters,metadata,vector,_distance
0,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2006,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.057723086, 0.0040142275, -0.22560917, 0.000...",0.671497
1,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2005,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05711652, 0.0043431735, -0.22533567, -0.001...",0.674311
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2011,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05348556, 0.0036516907, -0.22355503, 0.0015...",0.674318
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2004,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05662974, 0.0032411597, -0.22615424, 0.0003...",0.674625
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2003,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.05611936, 0.0032223125, -0.22531708, 0.0001...",0.67506
5,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2001,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055874716, 0.0032470312, -0.22464111, 0.001...",0.675357
6,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2002,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055449624, 0.0018201179, -0.22524701, 0.000...",0.675945
7,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2013,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.051458344, 0.004883893, -0.22452874, 0.0013...",0.677118
8,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2007,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.057227712, 0.0046041245, -0.22369905, 0.000...",0.677169
9,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,30,This dataset provides global annual dominant c...,2008,dominant_class,Image,"{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","{""layer_type"": ""categorical"", ""value_mappings""...","[0.055125456, 0.0020664048, -0.22447293, 0.001...",0.677831


In [25]:
results = table.search(query_embedding).limit(20).to_pandas()
results[results['name'] == results.iloc[0]['name']].sort_values(by="year", ascending=False).iloc[0].dataset


'projects/global-pasture-watch/assets/ggc-30m/v1/grassland_c/2022'