In [1]:
import pandas as pd
import lancedb
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
layers = [
    {
        "name": "ESA WorldCover",
        "dataset": "ESA/WorldCover/v200",
        "description": "The European Space Agency (ESA) WorldCover 10 m 2021 product provides a global land cover map for 2021 at 10 m resolution based on Sentinel-1 and Sentinel-2 data. The WorldCover product comes with 11 land cover classes and has been generated in the framework of the ESA WorldCover project, part of the 5th Earth Observation Envelope Programme (EOEP-5) of the European Space Agency.",
        "resolution": 10,
        "year": 2021,
        "band": "Map",
        "type": "ImageCollection",
    },
    {
        "name": "Dynamic World V1",
        "dataset": "GOOGLE/DYNAMICWORLD/V1",
        "description": "  Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes.  Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform. Given Dynamic World class estimations are derived from single images using a spatial context from a small moving window, top-1 'probabilities' for predicted land covers that are in-part defined by cover over time, like crops, can be comparatively low in the absence of obvious distinguishing features. High-return surfaces in arid climates, sand, sunglint, etc may also exhibit this phenomenon.  To select only pixels that confidently belong to a Dynamic World class, it is recommended to mask Dynamic World outputs by thresholding the estimated 'probability' of the top-1 prediction. ",
        "resolution": 10,
        "year": 2024,
        "band": "label",
        "type": "ImageCollection",
    },
    {
        "name": "Global 4-class PALSAR-2/PALSAR Forest/Non-Forest Map",
        "dataset": "JAXA/ALOS/PALSAR/YEARLY/FNF4",
        "description": "The global forest/non-forest map (FNF) is generated by classifying the SAR image (backscattering coefficient) in the global 25m resolution PALSAR-2/PALSAR SAR mosaic so that strong and low backscatter pixels are assigned as 'forest' and 'non-forest', respectively. Here, 'forest' is defined as the natural forest with the area larger than 0.5 ha and forest cover over 10%. This definition is the same as the Food and Agriculture Organization (FAO) definition. Since the radar backscatter from the forest depends on the region (climate zone), the classification of Forest/Non-Forest is conducted by using a region-dependent threshold of backscatter. The classification accuracy is checked by using in-situ photos and high-resolution optical satellite images.",
        "resolution": 25,
        "year": 2018,
        "band": "fnf",
        "type": "ImageCollection",
    },
    {
        "name": "MCD12Q1.061 MODIS Land Cover Type Yearly Global 500m",
        "dataset": "MODIS/061/MCD12Q1",
        "description": "The Terra and Aqua combined Moderate Resolution Imaging Spectroradiometer (MODIS) Land Cover Type (MCD12Q1) Version 6.1 data product provides global land cover types at yearly intervals. The MCD12Q1 Version 6.1 data product is derived using supervised classifications of MODIS Terra and Aqua reflectance data. Land cover types are derived from the International Geosphere-Biosphere Programme (IGBP), University of Maryland (UMD), Leaf Area Index (LAI), BIOME-Biogeochemical Cycles (BGC), and Plant Functional Types (PFT) classification schemes. The supervised classifications then underwent additional post-processing that incorporate prior knowledge and ancillary information to further refine specific classes. Additional land cover property assessment layers are provided by the Food and Agriculture Organization (FAO) Land Cover Classification System (LCCS) for land cover, land use, and surface hydrology.",
        "resolution": 500,
        "year": 2023,
        "band": "LC_Type1",
        "type": "ImageCollection",
    },
    {
        "name": "ESA WorldCereal 10 m v100",
        "dataset": "ESA/WorldCereal/2021/MODELS/v100",
        "description": "The European Space Agency (ESA) WorldCereal 10 m 2021 product suite consists of global-scale annual and seasonal crop maps and their related confidence. They were generated as part of the ESA-WorldCereal project. More information on the content of these products and the methodology used to generate them is described in [1].  This collection contains up to 106 agro-ecological zone (AEZ) images for each product which were all processed with respect to their own regional seasonality and should be considered as independent products. These seasons are described in the list below and were developed in [2] as part of the project. Note that cereals as described by WorldCereal include wheat, barley, and rye, which belong to the Triticeae tribe.  WorldCereal seasons description:      tc-annual: a one-year cycle being defined in an AEZ by the end of the last considered growing season     tc-wintercereals: the main cereals season defined in an AEZ     tc-springcereals: optional springcereals season, only defined in certain AEZ     tc-maize-main: the main maize season defined in an AEZ     tc-maize-second: optional second maize season, only defined in certain AEZ ",
        "resolution": 10,
        "year": 2021,
        "band": "classification",
        "type": "ImageCollection",
    },
]


In [3]:
# Done
glad_global_land_cover = [
    {
        "name": f"Annual global land cover and land use {year}",
        "dataset": f"projects/glad/GLCLU2020/v2/LCLUC_{year}",
        "resolution": 30,
        "description": "Global map with continuous measures of bare ground and tree height inside and outside of wetlands, seasonal water percent, and binary labels of built-up, permanent snow/ice, and cropland.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "visualization_parameters": {"min":0,"max":255,"palette":["FEFECC","FAFAC3","F7F7BB","F4F4B3","F1F1AB","EDEDA2","EAEA9A","E7E792","E4E48A",
"E0E081","DDDD79","DADA71","D7D769","D3D360","D0D058","CDCD50","CACA48","C6C63F","C3C337","C0C02F","BDBD27","B9B91E","B6B616",
"B3B30E","B0B006","609C60","5C985C","589558","549254","508E50","4C8B4C","488848","448544","408140","3C7E3C","387B38","347834",
"317431","2D712D","296E29","256B25","216721","1D641D","196119","155E15","115A11","0D570D","095409","065106","643700","643a00",
"643d00","644000","644300","644600","644900","654c00","654f00","655200","655500","655800","655a00","655d00","656000","656300",
"666600","666900","666c00","666f00","667200","667500","667800","667b00","ff99ff","FC92FC","F98BF9","F685F6","F37EF3","F077F0",
"ED71ED","EA6AEA","E763E7","E45DE4","E156E1","DE4FDE","DB49DB","D842D8","D53BD5","D235D2","CF2ECF","CC27CC","C921C9","C61AC6",
"C313C3","C00DC0","BD06BD","bb00bb","000003","000004","000005","BFC0C0","B7BDC2","AFBBC4","A8B8C6","A0B6C9","99B3CB","91B1CD",
"89AFD0","82ACD2","7AAAD4","73A7D6","6BA5D9","64A3DB","5CA0DD","549EE0","4D9BE2","4599E4","3E96E6","3694E9","2E92EB","278FED",
"1F8DF0","188AF2","1088F4","0986F7","55A5A5","53A1A2","519E9F","4F9B9C","4D989A","4B9597","499294","478F91","458B8F","43888C",
"418589","3F8286","3D7F84","3B7C81","39797E","37767B","357279","336F76","316C73","2F6970","2D666E","2B636B","296068","285D66",
"bb93b0","B78FAC","B48CA9","B189A6","AE85A2","AA829F","A77F9C","A47B99","A17895","9E7592","9A718F","976E8C","946B88","916885",
"8D6482","8A617F","875E7B","845A78","815775","7D5472","7A506E","774D6B","744A68","714765","de7cbb","DA77B7","D772B3","D46EAF",
"D169AB","CE64A8","CB60A4","C85BA0","C4579C","C15298","BE4D95","BB4991","B8448D","B54089","B23B86","AF3682","AB327E","A82D7A",
"A52976","A22473","9F1F6F","9C1B6B","991667","961264","000000","000000","000000",
"1964EB","1555E4","1147DD","0E39D6","0A2ACF","071CC8","030EC1","0000BA",
"0000BA","040464","0000FF","3051cf","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"547FC4","4D77BA","466FB1","4067A7","395F9E","335895","335896","335897","ff2828","ffffff","d0ffff","ffe0d0","ff7d00","fac800","c86400",
"fff000","afcd96","afcd96","64dcdc","00ffff","00ffff","00ffff","111133","000000"]},
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": ""} # TODO: fetch pixel value mappings once access to document is granted
            ]
        }
    } for year in [2000,2005,2010,2015,2020]
]
layers.extend(glad_global_land_cover)

In [4]:
glad_global_land_cover_change = [
        {
        "name": "Global land cover and land use change (2000-2020)",
        "dataset": "projects/glad/GLCLU2020/v2/LCLUC",
        "resolution": 30,
        "description": "The GLAD Global Land Cover and Land Use Change dataset quantifies changes in forest extent and height, cropland, built-up lands, surface water, and perennial snow and ice extent from the year 2000 to 2020 at 30-m spatial resolution. Land cover and land use states of 2020 with tranistions relative to 2000 labeled.",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": ""} # TODO: fetch pixel value mappings once access to document is granted
            ]
        }, 
        "visualization_parameters": {"min":0,"max":255,"palette":["FEFECC","FAFAC3","F7F7BB","F4F4B3","F1F1AB","EDEDA2","EAEA9A","E7E792","E4E48A",
"E0E081","DDDD79","DADA71","D7D769","D3D360","D0D058","CDCD50","CACA48","C6C63F","C3C337","C0C02F","BDBD27","B9B91E","B6B616",
"B3B30E","B0B006","609C60","5C985C","589558","549254","508E50","4C8B4C","488848","448544","408140","3C7E3C","387B38","347834",
"317431","2D712D","296E29","256B25","216721","1D641D","196119","155E15","115A11","0D570D","095409","065106","643700","643a00",
"643d00","644000","644300","644600","644900","654c00","654f00","655200","655500","655800","655a00","655d00","656000","656300",
"666600","666900","666c00","666f00","667200","667500","667800","667b00","ff99ff","FC92FC","F98BF9","F685F6","F37EF3","F077F0",
"ED71ED","EA6AEA","E763E7","E45DE4","E156E1","DE4FDE","DB49DB","D842D8","D53BD5","D235D2","CF2ECF","CC27CC","C921C9","C61AC6",
"C313C3","C00DC0","BD06BD","bb00bb","000003","000004","000005","BFC0C0","B7BDC2","AFBBC4","A8B8C6","A0B6C9","99B3CB","91B1CD",
"89AFD0","82ACD2","7AAAD4","73A7D6","6BA5D9","64A3DB","5CA0DD","549EE0","4D9BE2","4599E4","3E96E6","3694E9","2E92EB","278FED",
"1F8DF0","188AF2","1088F4","0986F7","55A5A5","53A1A2","519E9F","4F9B9C","4D989A","4B9597","499294","478F91","458B8F","43888C",
"418589","3F8286","3D7F84","3B7C81","39797E","37767B","357279","336F76","316C73","2F6970","2D666E","2B636B","296068","285D66",
"bb93b0","B78FAC","B48CA9","B189A6","AE85A2","AA829F","A77F9C","A47B99","A17895","9E7592","9A718F","976E8C","946B88","916885",
"8D6482","8A617F","875E7B","845A78","815775","7D5472","7A506E","774D6B","744A68","714765","de7cbb","DA77B7","D772B3","D46EAF",
"D169AB","CE64A8","CB60A4","C85BA0","C4579C","C15298","BE4D95","BB4991","B8448D","B54089","B23B86","AF3682","AB327E","A82D7A",
"A52976","A22473","9F1F6F","9C1B6B","991667","961264","000000","000000","000000",
"1964EB","1555E4","1147DD","0E39D6","0A2ACF","071CC8","030EC1","0000BA",
"0000BA","040464","0000FF","3051cf","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"000000","000000","000000","000000","000000","000000","000000","000000",
"547FC4","4D77BA","466FB1","4067A7","395F9E","335895","335896","335897","ff2828","ffffff","d0ffff","ffe0d0","ff7d00","fac800","c86400",
"fff000","afcd96","afcd96","64dcdc","00ffff","00ffff","00ffff","111133","000000"]}
    }
]
layers.extend(glad_global_land_cover_change)

In [5]:
glad_cropland_extent = [
    {
        "name": "Global cropland extent(2003-2019)",
        "dataset": f"projects/glad/GLCLU2020/Cropland_{year}",
        "resolution": 30,
        "description": "The 2000-2019 globally consistent cropland extent time-series at 30-m spatial resolution was derived from the Landsat satellite data archive. Cropland is defined as land used for annual and perennial herbaceous crops for human consumption, forage (including hay), and biofuel. The crop mapping was performed in four-year intervals.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_extent)

In [6]:
glad_cropland_gain = [
    {
        "name": "Global cropland gain (2003-2019)",
        "dataset": f"projects/glad/GLCLU2020/Cropland_gain",
        "resolution": 30,
        "description": "Global cropland gain between 2000 and 2019.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_gain)

In [7]:
glad_cropland_loss = [
    {
        "name": "Global cropland loss (2003-2019)",
        "dataset": "projects/glad/GLCLU2020/Cropland_loss",
        "resolution": 30,
        "description": "Global cropland loss between 2000 and 2019.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_loss)

In [8]:
# Done
natural_lands_map = [
    {
        "name": "Natural Lands Map",
        "dataset": "WRI/SBTN/naturalLands/v1/2020",
        "resolution": 30,
        "description": "The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems. 'Natural' and 'non-natural' definitions were adapted from the Accountability Framework initiative's definition of a natural ecosystem as \"one that substantially resembles - in terms of species composition, structure, and ecological function - what would be found in a given area in the absence of major human impacts\" and can include managed ecosystems as well as degraded ecosystems that are expected to regenerate either naturally or through management (AFi 2024). The SBTN Natural Lands Map operationalizes this definition by using proxies based on available data that align with AFi guidance to the extent possible.",
        "year": 2020,
        "band": "natural",
        "type": "Image",
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {
                    "value": 0, "color_hexcode": "#969696", "description": "Non-natural land"
                }, 
                {
                    "value": 1, "color_hexcode": "#a8ddb5", "description": "Natural land"
                }
            ]
        },
    }, 
    {
        "name": "Natural Lands - Classification",
        "dataset": "WRI/SBTN/naturalLands/v1/2020",
        "resolution": 30,
        "description": "The 'Natural Lands - Classification' layer shows the natural areas labeled by land cover. The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems.",
        "year": 2020,
        "band": "classification",
        "type": "Image",
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {
                    "value": 2,
                    "color_hexcode": "#246E24",
                    "description": "natural forests"
                },
                {
                    "value": 3,
                    "color_hexcode": "#B9B91E",
                    "description": "natural short vegetation"
                },
                {
                    "value": 4,
                    "color_hexcode": "#6BAED6",
                    "description": "natural water"
                },
                {
                    "value": 5,
                    "color_hexcode": "#06A285",
                    "description": "mangroves"
                },
                {
                    "value": 6,
                    "color_hexcode": "#FEFECC",
                    "description": "bare"
                },
                {
                    "value": 7,
                    "color_hexcode": "#ACD1E8",
                    "description": "snow"
                },
                {
                    "value": 8,
                    "color_hexcode": "#589558",
                    "description": "wet natural forests"
                },
                {
                    "value": 9,
                    "color_hexcode": "#093D09",
                    "description": "natural peat forests"
                },
                {
                    "value": 10,
                    "color_hexcode": "#DBDB7B",
                    "description": "wet natural short vegetation"
                },
                {
                    "value": 11,
                    "color_hexcode": "#99991A",
                    "description": "natural peat short vegetation"
                },
                {
                    "value": 12,
                    "color_hexcode": "#D3D3D3",
                    "description": "crop"
                },
                {
                    "value": 13,
                    "color_hexcode": "#D3D3D3",
                    "description": "built"
                },
                {
                    "value": 14,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural tree cover"
                },
                {
                    "value": 15,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural short vegetation"
                },
                {
                    "value": 16,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural water"
                },
                {
                    "value": 17,
                    "color_hexcode": "#D3D3D3",
                    "description": "wet non-natural tree cover"
                },
                {
                    "value": 18,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural peat tree cover"
                },
                {
                    "value": 19,
                    "color_hexcode": "#D3D3D3",
                    "description": "wet non-natural short vegetation"
                },
                {
                    "value": 20,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural peat short vegetation"
                },
                {
                    "value": 21,
                    "color_hexcode": "#D3D3D3",
                    "description": "non-natural bare"
                }
            ]
        }
    },
]
layers.extend(natural_lands_map)

In [9]:
tropical_tree_cover = [
    {
    
        "name": "Tropical tree cover",
        "dataset": "projects/wri-datalab/TropicalTreeCover",
        "resolution": 10,
        "description": "Land & Carbon Lab’s Tropical Tree Cover data set, developed in collaboration with World Resource Institutes Global Restoration Initiative, uses globally-consistent satellite data at a 10-meter resolution to map tree cover across the tropics with greater granularity, improving our ability to quantify tree cover on non-forest lands like urban areas and cropland, and monitor trees at small spatial scales. This allows decision makers to better understand trees outside of dense forests, supports local communities who protect and restore these ecosystems to monitor their work and encourages greater investment in their projects.",
        "year": 2020,
        "band": "b1",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "continuous", 
            "value_mappings": {"min": 0, "max": 100} # TODO: should we include an additional text field to further describe what the min/max values represent? 
        }
    }
]
layers.extend(tropical_tree_cover)

In [10]:
# TODO: review other datasets in Confluence: 
# - probability cultivated grassland
# - dominant grass class
# - probability natural/semi-natural grassland
dominant_grasslands = [
    {
        "name": "Dominant Grasslands",
        "dataset": f"projects/global-pasture-watch/assets/ggc-30m/v1/grassland_c/{year}",
        "resolution": 30,
        "description": "This dataset provides global annual dominant class maps of grasslands (cultivated and natural/semi-natural) from 2000 to 2022 at 30-m spatial resolution. Produced by Land & Carbon Lab Global Pasture Watch initiative, the mapped grassland extent includes any land cover type, which contains at least 30% of dry or wet low vegetation, dominated by grasses and forbs (less than 3 meters) and a: maximum of 50% tree canopy cover (greater than 5 meters), maximum of 70% of other woody vegetation (scrubs and open shrubland), and a maximum of 50% active cropland cover in mosaic landscapes of cropland & other vegetation.",
        "year": year,
        "band": "dominant_class",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorical",
            "value_mappings": [
                {"value": 0, "color_hexcode": "#ffffff", "description": "Other"},
                {"value": 1, "color_hexcode": "#ff9916", "description": "Cultivated grassland"},
                {"value": 2, "color_hexcode": "#ffcd73", "description": "Natural/Semi-natural grassland"}, 
            ]
        }, 
        "visualization_parameters":{
            "opacity":1, 
            "min":1,
            "max":2,
            "palette":["ff9916","ffcd73"]
        },
    } for year in range(2001, 2023)
]
layers.extend(dominant_grasslands)

In [11]:
global_tree_canopy_height = [
    {
        "name": "Global Canopy Height Maps",
        "dataset": "projects/meta-forest-monitoring-okw37/assets/CanopyHeight",
        "resolution": 1,
        "description": "The Global Canopy Height Maps dataset offers comprehensive insights into tree canopy heights worldwide, providing an overview of tree canopy presence and height for the analysed period (2009-2020), with eighty per cent of the data obtained from imagery acquired between 2018 and 2020. The sub-meter resolution canopy height maps using self-supervised learning and a vision transformer trained on Aerial and GEDI Lidar.",
        "year": 2020,
        "band": "cover_code",
        "type": "ImageCollection",
        "metadata": {}
    }
]
layers.extend(global_tree_canopy_height)

In [12]:
# Done
rgb = lambda r,g,b: '#%02x%02x%02x' % (r,g,b)

global_map_of_forest_types = [
    {
        "name": "Global map of forest types 2020",
        "dataset": "JRC/GFC2020_subtypes/V0",
        "resolution": 10,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": 2020,
        "band": "GFT",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 1, "color_hexcode": "#78c679", "description": "Naturally regenerating forest"},
                {"value": 10, "color_hexcode": "#006837", "description": "Primary forest"},
                {"value": 20, "color_hexcode": "#cc6600", "description": "Planted/Plantation forest"}, 
            ]
        }, 
        "visualization_parameters": {"min": 0, "max": 20, "palette":  [
      rgb(255, 255, 255), rgb(120, 198, 121), rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 104, 55), rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(0, 0, 0),       rgb(0, 0, 0),       rgb(0, 0, 0),    rgb(0, 0, 0),
      rgb(204, 102, 0)
    ]}
    }
]
layers.extend(global_map_of_forest_types)


In [13]:
# Done
dynamic_world_landcover = [
    {
        "name": "Dynamic World",
        "dataset": "GOOGLE/DYNAMICWORLD/V1",
        "resolution": 10,
        "description": "Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes. Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform.",
        "year": 2024,
        "band": "label",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {"value": 0, "color_hexcode": "#419bdf", "description": "water"},
                {"value": 1, "color_hexcode": "#397d49", "description": "trees"},
                {"value": 2, "color_hexcode": "#88b053", "description": "grass"}, 
                {"value": 3, "color_hexcode": "#7a87c6", "description": "flooded_vegetation"},
                {"value": 4, "color_hexcode": "#e49635", "description": "crops"},
                {"value": 5, "color_hexcode": "#dfc35a", "description": "shrub_and_scrub"}, 
                {"value": 6, "color_hexcode": "#c4281b", "description": "built"}, 
                {"value": 7, "color_hexcode": "#a59b8f", "description": "bare"}, 
                {"value": 8, "color_hexcode": "#b39fe1", "description": "snow_and_ice"}, 
            ]   
        }, 
        "visualization_parameters": {"min":1, "max":8, "palette":[
    '419bdf', '397d49', '88b053', '7a87c6', 'e49635', 'dfc35a', 'c4281b',
    'a59b8f', 'b39fe1']}
    }
]
layers.extend(dynamic_world_landcover)

In [14]:
global_forest_above_ground_biomass = [
    {
        "name": f"Global Forest Above Ground Biomass{' Uncertainty' if band == 'SD' else ''}",
        "dataset": f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_{band}_{year}_v4",
        "resolution": 100,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": year,
        "band": band,
        "type": "ImageCollection",
        "metadata":{} , 
        "visualization_parameters": {"min":1,  "max":450, "palette": ["#C6ECAE","#A1D490","#7CB970","#57A751","#348E32", "#267A29","#176520","#0C4E15","#07320D","#031807"]}
    } for year in [2010, 2020] for band in ["AGB", "SD"]
]
layers.extend(global_forest_above_ground_biomass)

In [15]:
global_cropland_yield_and_area = [
    {
        "name": f"Global Cropland Yield and Area ({layer}:{crop_name})",
        "dataset": f"users/cgiardata/spam_data/2020/{layer}/{crop_id}_A",
        "resolution": 10000,
        "description": f"A global dataset providing fine spatial resolution estimates of crop area, yield, and production, facilitating detailed analysis of agricultural production patterns and informing policy decisions. Crop name: {crop_name}, Layer: {layer}",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {"units": units, "layer": layer, "crop_name":crop_name}
    } 
    for layer, units in {"yield": "kg/ha", "physical_area":"ha", "harvest_area":"ha", "production":"Mt"}.items() 
    for crop_id, crop_name in {
        "whea":"Wheat", "rice":"Rice","maiz":"Maize",
        "barl":"Barley","mill":"Small Millet","pmil":"Pearl Millet",
        "sorg":"Sorghum","ocer":"Other Cereals","pota":"Potato",
        "swpo":"Sweet Potato", "yams":"Yams", "cass":"Cassava", 
        "orts":"Other Roots","bean":"Bean","chic":"Chickpea",
        "cowp":"Cowpea","pige":"Pigeon Pea","lent":"Lentil",
        "opul":"Other Pulses","soyb":"Soybean","grou":"Groundnut",
        "cnut":"Coconut","oilp":"Oilpalm","sunf":"Sunflower",
        "rape":"Rapeseed","sesa":"Sesame Seed","ooil":"Other Oil Crops",
        "sugc":"Sugarcane","sugb":"Sugarbeet","cott":"Cotton",
        "ofib":"Other Fibre Crops","coff":"Arabic Coffee","rcof":"Robust Coffee",
        "coco":"Cocoa", "teas":"Tea", "toba":"Tobacco",
        "bana":"Banana","plnt":"Plantain","citr":"Citrus",
        "trof":"Other Tropical Fruit", "temf":"Temperate Fruit", "toma":"Tomato",
        "onio":"Onion", "vege":"Other Vegetables", "rubb":"Rubber",
        "rest":"Rest Of Crops",
    }.items()
]
layers.extend(global_cropland_yield_and_area)

In [16]:
layers = [{
        **layer, 
        "metadata": json.dumps(layer.get("metadata", {})), 
        "visualization_parameters": json.dumps(layer.get("visualization_parameters", {}))
    } for layer in layers
]

In [17]:
# TODO: contextual layers, DIST Alerts

In [18]:
df = pd.DataFrame(layers)
df

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,visualization_parameters
0,ESA WorldCover,ESA/WorldCover/v200,The European Space Agency (ESA) WorldCover 10 ...,10,2021,Map,ImageCollection,{},{}
1,Dynamic World V1,GOOGLE/DYNAMICWORLD/V1,Dynamic World is a 10m near-real-time (NRT) ...,10,2024,label,ImageCollection,{},{}
2,Global 4-class PALSAR-2/PALSAR Forest/Non-Fore...,JAXA/ALOS/PALSAR/YEARLY/FNF4,The global forest/non-forest map (FNF) is gene...,25,2018,fnf,ImageCollection,{},{}
3,MCD12Q1.061 MODIS Land Cover Type Yearly Globa...,MODIS/061/MCD12Q1,The Terra and Aqua combined Moderate Resolutio...,500,2023,LC_Type1,ImageCollection,{},{}
4,ESA WorldCereal 10 m v100,ESA/WorldCereal/2021/MODELS/v100,The European Space Agency (ESA) WorldCereal 10...,10,2021,classification,ImageCollection,{},{}
...,...,...,...,...,...,...,...,...,...
237,Global Cropland Yield and Area (production:Tom...,users/cgiardata/spam_data/2020/production/toma_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"{""units"": ""Mt"", ""layer"": ""production"", ""crop_n...",{}
238,Global Cropland Yield and Area (production:Onion),users/cgiardata/spam_data/2020/production/onio_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"{""units"": ""Mt"", ""layer"": ""production"", ""crop_n...",{}
239,Global Cropland Yield and Area (production:Oth...,users/cgiardata/spam_data/2020/production/vege_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"{""units"": ""Mt"", ""layer"": ""production"", ""crop_n...",{}
240,Global Cropland Yield and Area (production:Rub...,users/cgiardata/spam_data/2020/production/rubb_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"{""units"": ""Mt"", ""layer"": ""production"", ""crop_n...",{}


In [19]:
from langchain_ollama.embeddings import OllamaEmbeddings

embedder = OllamaEmbeddings(model="nomic-embed-text")

df["vector"] = embedder.embed_documents(list((df["name"] + " - " +df["dataset"] + " - " + df["description"] ).values))
df.head()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,visualization_parameters,vector
0,ESA WorldCover,ESA/WorldCover/v200,The European Space Agency (ESA) WorldCover 10 ...,10,2021,Map,ImageCollection,{},{},"[0.011387724, -0.001863103, -0.2042005, -0.050..."
1,Dynamic World V1,GOOGLE/DYNAMICWORLD/V1,Dynamic World is a 10m near-real-time (NRT) ...,10,2024,label,ImageCollection,{},{},"[0.03720288, -0.009078087, -0.23145294, 0.0087..."
2,Global 4-class PALSAR-2/PALSAR Forest/Non-Fore...,JAXA/ALOS/PALSAR/YEARLY/FNF4,The global forest/non-forest map (FNF) is gene...,25,2018,fnf,ImageCollection,{},{},"[0.045273844, 0.0012465789, -0.17780587, -0.01..."
3,MCD12Q1.061 MODIS Land Cover Type Yearly Globa...,MODIS/061/MCD12Q1,The Terra and Aqua combined Moderate Resolutio...,500,2023,LC_Type1,ImageCollection,{},{},"[0.03600016, 0.013895035, -0.19835362, -0.0338..."
4,ESA WorldCereal 10 m v100,ESA/WorldCereal/2021/MODELS/v100,The European Space Agency (ESA) WorldCereal 10...,10,2021,classification,ImageCollection,{},{},"[-0.0036287361, 0.026090976, -0.2038154, 0.007..."


In [20]:
db = lancedb.connect("s3://zeno-static-data/layers-context")
table = db.create_table("zeno-layers-context-v1.1", mode="overwrite", data=df)

[2025-01-14T15:03:21Z WARN  lance_table::io::commit] Using unsafe commit handler. Concurrent writes may result in data loss. Consider providing a commit handler that prevents conflicting writes.


In [21]:
query_embedding = embedder.embed_query("Grasslands preservation in Brazil")

In [22]:
results = table.search(query_embedding).limit(30)
results.to_pandas()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,visualization_parameters,vector,_distance
0,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2006,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.057723086, 0.0040142275, -0.22560917, 0.000...",0.671497
1,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2005,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05711652, 0.0043431735, -0.22533567, -0.001...",0.674311
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2011,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05348556, 0.0036516907, -0.22355503, 0.0015...",0.674318
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2004,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05662974, 0.0032411597, -0.22615424, 0.0003...",0.674625
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2003,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05611936, 0.0032223125, -0.22531708, 0.0001...",0.67506
5,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2001,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055874716, 0.0032470312, -0.22464111, 0.001...",0.675357
6,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2002,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055449624, 0.0018201179, -0.22524701, 0.000...",0.675945
7,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2013,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.051458344, 0.004883893, -0.22452874, 0.0013...",0.677118
8,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2007,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.057227712, 0.0046041245, -0.22369905, 0.000...",0.677169
9,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2008,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055125456, 0.0020664048, -0.22447293, 0.001...",0.677831


In [23]:
filtered_results = table.search(query_embedding).limit(30).where("band != 'b1'")
filtered_results.to_pandas()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,visualization_parameters,vector,_distance
0,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2006,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.057723086, 0.0040142275, -0.22560917, 0.000...",0.671497
1,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2005,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05711652, 0.0043431735, -0.22533567, -0.001...",0.674311
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2011,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05348556, 0.0036516907, -0.22355503, 0.0015...",0.674318
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2004,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05662974, 0.0032411597, -0.22615424, 0.0003...",0.674625
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2003,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.05611936, 0.0032223125, -0.22531708, 0.0001...",0.67506
5,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2001,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055874716, 0.0032470312, -0.22464111, 0.001...",0.675357
6,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2002,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055449624, 0.0018201179, -0.22524701, 0.000...",0.675945
7,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2013,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.051458344, 0.004883893, -0.22452874, 0.0013...",0.677118
8,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2007,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.057227712, 0.0046041245, -0.22369905, 0.000...",0.677169
9,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2008,dominant_class,ImageCollection,"{""layer_type"": ""categorical"", ""value_mappings""...","{""opacity"": 1, ""min"": 1, ""max"": 2, ""palette"": ...","[0.055125456, 0.0020664048, -0.22447293, 0.001...",0.677831


In [24]:
results = table.search(query_embedding).limit(20).to_pandas()
results[results['name'] == results.iloc[0]['name']].sort_values(by="year", ascending=False).iloc[0].dataset


'projects/global-pasture-watch/assets/ggc-30m/v1/grassland_c/2022'