In [28]:
import pandas as pd
import lancedb
import json

In [2]:
layers = [
    {
        "name": "SBTN Natural Lands Map v1",
        "dataset": "WRI/SBTN/naturalLands/v1",
        "description": "The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems.  'Natural' and 'non-natural' definitions were adapted from the Accountability Framework initiative's definition of a natural ecosystem as 'one that substantially resembles - in terms of species composition, structure, and ecological function - what would be found in a given area in the absence of major human impacts' and can include managed ecosystems as well as degraded ecosystems that are expected to regenerate either naturally or through management (AFi 2024). The SBTN Natural Lands Map operationalizes this definition by using proxies based on available data that align with AFi guidance to the extent possible.  This map was made by compiling existing global and regional data.You can find the full technical note explaining the methodology linked on the Natural Lands GitHub. This work was a collaboration between Land & Carbon Lab at the World Resources Institute, World Wildlife Fund US, Systemiq, and SBTN.",
        "resolution": 30,
        "year": 2020,
        "band": "classification",
        "type": "Image",
    },
    {
        "name": "ESA WorldCover",
        "dataset": "ESA/WorldCover/v200",
        "description": "The European Space Agency (ESA) WorldCover 10 m 2021 product provides a global land cover map for 2021 at 10 m resolution based on Sentinel-1 and Sentinel-2 data. The WorldCover product comes with 11 land cover classes and has been generated in the framework of the ESA WorldCover project, part of the 5th Earth Observation Envelope Programme (EOEP-5) of the European Space Agency.",
        "resolution": 10,
        "year": 2021,
        "band": "Map",
        "type": "ImageCollection",
    },
    {
        "name": "Dynamic World V1",
        "dataset": "GOOGLE/DYNAMICWORLD/V1",
        "description": "  Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes.  Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform. Given Dynamic World class estimations are derived from single images using a spatial context from a small moving window, top-1 'probabilities' for predicted land covers that are in-part defined by cover over time, like crops, can be comparatively low in the absence of obvious distinguishing features. High-return surfaces in arid climates, sand, sunglint, etc may also exhibit this phenomenon.  To select only pixels that confidently belong to a Dynamic World class, it is recommended to mask Dynamic World outputs by thresholding the estimated 'probability' of the top-1 prediction. ",
        "resolution": 10,
        "year": 2024,
        "band": "label",
        "type": "ImageCollection",
    },
    {
        "name": "Global 4-class PALSAR-2/PALSAR Forest/Non-Forest Map",
        "dataset": "JAXA/ALOS/PALSAR/YEARLY/FNF4",
        "description": "The global forest/non-forest map (FNF) is generated by classifying the SAR image (backscattering coefficient) in the global 25m resolution PALSAR-2/PALSAR SAR mosaic so that strong and low backscatter pixels are assigned as 'forest' and 'non-forest', respectively. Here, 'forest' is defined as the natural forest with the area larger than 0.5 ha and forest cover over 10%. This definition is the same as the Food and Agriculture Organization (FAO) definition. Since the radar backscatter from the forest depends on the region (climate zone), the classification of Forest/Non-Forest is conducted by using a region-dependent threshold of backscatter. The classification accuracy is checked by using in-situ photos and high-resolution optical satellite images.",
        "resolution": 25,
        "year": 2018,
        "band": "fnf",
        "type": "ImageCollection",
    },
    {
        "name": "Global map of forest types 2020",
        "dataset": "JRC/GFC2020_subtypes/V0",
        "description": "The global map of forest types provides a spatially explicit representation of primary forest, naturally regenerating forest and planted forest (including plantation forest) for the year 2020 at 10m spatial resolution. The base layer for mapping these forest types is the extent of forest cover of version 1 of the Global Forest Cover map for year 2020 (JRC GFC 2020). The definitions of the forest types follow the definitions of the Regulation from the European Union 'on the making available on the Union market and the export from the Union of certain commodities and products associated with deforestation and forest degradation' (EUDR, Regulation (EU) 2023/1115), which are similar to characteristics and specific forest categories from the FAO Global Forest Resources Assessment. The year 2020 corresponds to the cut-off date of the EUDR.",
        "resolution": 10,
        "year": 2020,
        "band": "GFT",
        "type": "ImageCollection",
    },
    {
        "name": "MCD12Q1.061 MODIS Land Cover Type Yearly Global 500m",
        "dataset": "MODIS/061/MCD12Q1",
        "description": "The Terra and Aqua combined Moderate Resolution Imaging Spectroradiometer (MODIS) Land Cover Type (MCD12Q1) Version 6.1 data product provides global land cover types at yearly intervals. The MCD12Q1 Version 6.1 data product is derived using supervised classifications of MODIS Terra and Aqua reflectance data. Land cover types are derived from the International Geosphere-Biosphere Programme (IGBP), University of Maryland (UMD), Leaf Area Index (LAI), BIOME-Biogeochemical Cycles (BGC), and Plant Functional Types (PFT) classification schemes. The supervised classifications then underwent additional post-processing that incorporate prior knowledge and ancillary information to further refine specific classes. Additional land cover property assessment layers are provided by the Food and Agriculture Organization (FAO) Land Cover Classification System (LCCS) for land cover, land use, and surface hydrology.",
        "resolution": 500,
        "year": 2023,
        "band": "LC_Type1",
        "type": "ImageCollection",
    },
    {
        "name": "ESA WorldCereal 10 m v100",
        "dataset": "ESA/WorldCereal/2021/MODELS/v100",
        "description": "The European Space Agency (ESA) WorldCereal 10 m 2021 product suite consists of global-scale annual and seasonal crop maps and their related confidence. They were generated as part of the ESA-WorldCereal project. More information on the content of these products and the methodology used to generate them is described in [1].  This collection contains up to 106 agro-ecological zone (AEZ) images for each product which were all processed with respect to their own regional seasonality and should be considered as independent products. These seasons are described in the list below and were developed in [2] as part of the project. Note that cereals as described by WorldCereal include wheat, barley, and rye, which belong to the Triticeae tribe.  WorldCereal seasons description:      tc-annual: a one-year cycle being defined in an AEZ by the end of the last considered growing season     tc-wintercereals: the main cereals season defined in an AEZ     tc-springcereals: optional springcereals season, only defined in certain AEZ     tc-maize-main: the main maize season defined in an AEZ     tc-maize-second: optional second maize season, only defined in certain AEZ ",
        "resolution": 10,
        "year": 2021,
        "band": "classification",
        "type": "ImageCollection",
    },
]


In [3]:
glad_global_land_cover = [
    {
        "name": f"Annual global land cover and land use {year}",
        "dataset": f"projects/glad/GLCLU2020/v2/LCLUC_{year}",
        "resolution": 30,
        "description": "Global map with continuous measures of bare ground and tree height inside and outside of wetlands, seasonal water percent, and binary labels of built-up, permanent snow/ice, and cropland.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": ""} # TODO: fetch pixel value mappings once access to document is granted
            ]
        }
    } for year in [2000,2005,2010,2015,2020]
]
layers.extend(glad_global_land_cover)

In [4]:
glad_global_land_cover_change = [
        {
        "name": "Global land cover and land use change (2000-2020)",
        "dataset": "projects/glad/GLCLU2020/v2/LCLUC",
        "resolution": 30,
        "description": "The GLAD Global Land Cover and Land Use Change dataset quantifies changes in forest extent and height, cropland, built-up lands, surface water, and perennial snow and ice extent from the year 2000 to 2020 at 30-m spatial resolution. Land cover and land use states of 2020 with tranistions relative to 2000 labeled.",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": ""} # TODO: fetch pixel value mappings once access to document is granted
            ]
        }
    }
]
layers.extend(glad_global_land_cover_change)

In [5]:
glad_cropland_extent = [
    {
        "name": "Global cropland extent(2003-2019)",
        "dataset": f"projects/glad/GLCLU2020/Cropland_{year}",
        "resolution": 30,
        "description": "The 2000-2019 globally consistent cropland extent time-series at 30-m spatial resolution was derived from the Landsat satellite data archive. Cropland is defined as land used for annual and perennial herbaceous crops for human consumption, forage (including hay), and biofuel. The crop mapping was performed in four-year intervals.",
        "year": year,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_extent)

In [6]:
glad_cropland_gain = [
    {
        "name": "Global cropland gain (2003-2019)",
        "dataset": "projects/glad/GLCLU2020/Cropland_{year}",
        "resolution": 30,
        "description": "Global cropland gain between 2000 and 2019.",
        "year": 2019,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_gain)

In [7]:
glad_cropland_loss = [
    {
        "name": "Global cropland loss (2003-2019)",
        "dataset": "projects/glad/GLCLU2020/Cropland_loss",
        "resolution": 30,
        "description": "Global cropland loss between 2000 and 2019.",
        "year": 2019,
        "band": "b1",
        "type": "Image",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 0, "description": "no croplands or no data"},
                {"value": 1, "description": "croplands"}
            ]
        }
    } for year in [2003,2007,2011,2015,2019]
]
layers.extend(glad_cropland_loss)

In [8]:
natural_lands_map = [
    {
        "name": "Natural Lands Map",
        "dataset": "WRI/SBTN/naturalLands/v1/2020",
        "resolution": 30,
        "description": "The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems. 'Natural' and 'non-natural' definitions were adapted from the Accountability Framework initiative's definition of a natural ecosystem as \"one that substantially resembles - in terms of species composition, structure, and ecological function - what would be found in a given area in the absence of major human impacts\" and can include managed ecosystems as well as degraded ecosystems that are expected to regenerate either naturally or through management (AFi 2024). The SBTN Natural Lands Map operationalizes this definition by using proxies based on available data that align with AFi guidance to the extent possible.",
        "year": 2020,
        "band": band,
        "type": "Image",
        "metadata": {},
    } for band in ["classification", "natural"]
]
layers.extend(natural_lands_map)

In [9]:
tropical_tree_cover = [
    {
    
        "name": "Tropical tree cover",
        "dataset": "projects/wri-datalab/TropicalTreeCover",
        "resolution": 10,
        "description": "Land & Carbon Lab’s Tropical Tree Cover data set, developed in collaboration with World Resource Institutes Global Restoration Initiative, uses globally-consistent satellite data at a 10-meter resolution to map tree cover across the tropics with greater granularity, improving our ability to quantify tree cover on non-forest lands like urban areas and cropland, and monitor trees at small spatial scales. This allows decision makers to better understand trees outside of dense forests, supports local communities who protect and restore these ecosystems to monitor their work and encourages greater investment in their projects.",
        "year": 2020,
        "band": "b1",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "continuous", 
            "value_mappings": {"min": 0, "max": 100} # TODO: should we include an additional text field to further describe what the min/max values represent? 
        }
    }
]
layers.extend(tropical_tree_cover)

In [10]:
# TODO: review other datasets in Confluence: 
# - probability cultivated grassland
# - dominant grass class
# - probability natural/semi-natural grassland
dominant_grasslands = [
    {
        "name": "Dominant Grasslands",
        "dataset": f"projects/global-pasture-watch/assets/ggc-30m/v1/grassland_c/{year}",
        "resolution": 30,
        "description": "This dataset provides global annual dominant class maps of grasslands (cultivated and natural/semi-natural) from 2000 to 2022 at 30-m spatial resolution. Produced by Land & Carbon Lab Global Pasture Watch initiative, the mapped grassland extent includes any land cover type, which contains at least 30% of dry or wet low vegetation, dominated by grasses and forbs (less than 3 meters) and a: maximum of 50% tree canopy cover (greater than 5 meters), maximum of 70% of other woody vegetation (scrubs and open shrubland), and a maximum of 50% active cropland cover in mosaic landscapes of cropland & other vegetation.",
        "year": year,
        "band": "dominant_class",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorical",
            "value_mappings": [
                {"value": 0, "description": "#ffffff Other"},
                {"value": 1, "description": "#ff9916 Cultivated grassland"},
                {"value": 2, "description": "#ffcd73 Natural/Semi-natural grassland"}, 
            ]
        }
    } for year in range(2001, 2023)
]
layers.extend(dominant_grasslands)

In [11]:
global_tree_canopy_height = [
    {
        "name": "Dominant Grasslands",
        "dataset": "projects/meta-forest-monitoring-okw37/assets/CanopyHeight",
        "resolution": 1,
        "description": "The Global Canopy Height Maps dataset offers comprehensive insights into tree canopy heights worldwide, providing an overview of tree canopy presence and height for the analysed period (2009-2020), with eighty per cent of the data obtained from imagery acquired between 2018 and 2020. The sub-meter resolution canopy height maps using self-supervised learning and a vision transformer trained on Aerial and GEDI Lidar.",
        "year": 2020,
        "band": "cover_code",
        "type": "ImageCollection",
        "metadata": {}
    }
]
layers.extend(global_tree_canopy_height)

In [12]:
global_map_of_forest_types = [
    {
        "name": "Global map of forest types 2020",
        "dataset": "JRC/GFC2020_subtypes/V0",
        "resolution": 10,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": 2020,
        "band": "GFT",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorial", 
            "value_mappings": [
                {"value": 1, "description": "#78c679 Naturally regenerating forest"},
                {"value": 10, "description": "#006837 Primary forest"},
                {"value": 20, "description": "#cc6600 Planted/Plantation forest"}, 
            ]
        }
    }
]
layers.extend(global_map_of_forest_types)


In [13]:
dynamic_world_landcover = [
    {
        "name": "Dynamic World",
        "dataset": "GOOGLE/DYNAMICWORLD/V1",
        "resolution": 10,
        "description": "Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes. Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform.",
        "year": 2024,
        "band": "label",
        "type": "ImageCollection",
        "metadata": {
            "layer_type": "categorical", 
            "value_mappings": [
                {"value": 0, "description": "#419bdf water"},
                {"value": 1, "description": "#397d49 trees"},
                {"value": 2, "description": "#88b053 grass"}, 
                {"value": 3, "description": "#7a87c6 flooded_vegetation"},
                {"value": 4, "description": "#e49635 crops"},
                {"value": 5, "description": "#dfc35a shrub_and_scrub"}, 
                {"value": 6, "description": "#c4281b built"}, 
                {"value": 7, "description": "#a59b8f bare"}, 
                {"value": 8, "description": "#b39fe1 snow_and_ice"}, 
            ]
            
        }
    }
]
layers.extend(dynamic_world_landcover)

In [14]:
global_forest_above_ground_biomass = [
    {
        "name": f"Global Forest Above Ground Biomass{' Uncertainty' if band == 'SD' else ''}",
        "dataset": f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_{band}_{year}_v4",
        "resolution": 100,
        "description": "This dataset provides estimates of forest above-ground biomass for the years 2010 and 2020 in tonnes per hectare (Mg/ha). These estimates are derived from a combination of Earth observation data, depending on the year, obtained from the Copernicus Sentinel-1 mission, Envisat's ASAR instrument, and JAXA's Advanced Land Observing Satellite (ALOS-1 and ALOS-2), along with additional information from other Earth observation sources.",
        "year": year,
        "band": "AGB",
        "type": "ImageCollection",
        "metadata": {}
    } for year in [2010, 2020] for band in ["AGB", "SD"]
]
layers.extend(global_forest_above_ground_biomass)

In [37]:
global_cropland_yield_and_area = [
    {
        "name": "Global Cropland Yield and Area",
        "dataset": f"users/cgiardata/spam_data/2020/{layer}/{crop}_A",
        "resolution": 10000,
        "description": "A global dataset providing fine spatial resolution estimates of crop area, yield, and production, facilitating detailed analysis of agricultural production patterns and informing policy decisions.",
        "year": 2020,
        "band": "b1",
        "type": "Image",
        "metadata": {"units": units}
    } for layer, units in {"yield": "kg/ha", "physical_area":"ha", "harvest_area":"ha", "production":"Mt"}.items() for crop in ["RUBB","TEMF","COTT","LENT","MAIZ","OFIB","SORG","TEAS","REST","ONIO","PLNT","GROU","SUNF","COCO","MILL","CITR","CNUT","OOIL","TROF","OCER","PMIL","RICE","TOBA","SUGC","ORTS","WHEA","OPUL","SOYB","TOMA","BARL","YAMS","COWP","BANA","BEAN","OILP","SESA","PIGE","POTA","VEGE","CASS","SUGB","CHIC","RAPE","SWPO","RCOF","COFF"]
]
layers.extend(global_cropland_yield_and_area)

for layer in layers: 
    if not layer.get("metadata"): 
        layer["metadata"] = {}
    layer["metadata"] = json.dumps(layer["metadata"])

layers = [{**layer, "metadata": json.dumps(layer["metadata"] if layer.get("metadata") else json.dumps({}))} for layer in layers]

In [16]:
# TODO: contextual layers, DIST Alerts

In [38]:
df = pd.DataFrame(layers)
df

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata
0,SBTN Natural Lands Map v1,WRI/SBTN/naturalLands/v1,The SBTN Natural Lands Map v1 is a 2020 baseli...,30,2020,classification,Image,"""{}"""
1,ESA WorldCover,ESA/WorldCover/v200,The European Space Agency (ESA) WorldCover 10 ...,10,2021,Map,ImageCollection,"""{}"""
2,Dynamic World V1,GOOGLE/DYNAMICWORLD/V1,Dynamic World is a 10m near-real-time (NRT) ...,10,2024,label,ImageCollection,"""{}"""
3,Global 4-class PALSAR-2/PALSAR Forest/Non-Fore...,JAXA/ALOS/PALSAR/YEARLY/FNF4,The global forest/non-forest map (FNF) is gene...,25,2018,fnf,ImageCollection,"""{}"""
4,Global map of forest types 2020,JRC/GFC2020_subtypes/V0,The global map of forest types provides a spat...,10,2020,GFT,ImageCollection,"""{}"""
...,...,...,...,...,...,...,...,...
423,Global Cropland Yield and Area,users/cgiardata/spam_data/2020/production/CHIC_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"""{\""units\"": \""Mt\""}"""
424,Global Cropland Yield and Area,users/cgiardata/spam_data/2020/production/RAPE_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"""{\""units\"": \""Mt\""}"""
425,Global Cropland Yield and Area,users/cgiardata/spam_data/2020/production/SWPO_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"""{\""units\"": \""Mt\""}"""
426,Global Cropland Yield and Area,users/cgiardata/spam_data/2020/production/RCOF_A,A global dataset providing fine spatial resolu...,10000,2020,b1,Image,"""{\""units\"": \""Mt\""}"""


In [39]:
from langchain_ollama.embeddings import OllamaEmbeddings

embedder = OllamaEmbeddings(model="nomic-embed-text")
df["vector"] = embedder.embed_documents(list(df["description"].values))
df.head()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,vector
0,SBTN Natural Lands Map v1,WRI/SBTN/naturalLands/v1,The SBTN Natural Lands Map v1 is a 2020 baseli...,30,2020,classification,Image,"""{}""","[0.018692888, 0.011777715, -0.20214601, -0.013..."
1,ESA WorldCover,ESA/WorldCover/v200,The European Space Agency (ESA) WorldCover 10 ...,10,2021,Map,ImageCollection,"""{}""","[0.018436758, 0.005646095, -0.21238795, -0.044..."
2,Dynamic World V1,GOOGLE/DYNAMICWORLD/V1,Dynamic World is a 10m near-real-time (NRT) ...,10,2024,label,ImageCollection,"""{}""","[0.044064987, -0.020232122, -0.23173736, 0.010..."
3,Global 4-class PALSAR-2/PALSAR Forest/Non-Fore...,JAXA/ALOS/PALSAR/YEARLY/FNF4,The global forest/non-forest map (FNF) is gene...,25,2018,fnf,ImageCollection,"""{}""","[0.057427455, 0.013046537, -0.18393743, -0.006..."
4,Global map of forest types 2020,JRC/GFC2020_subtypes/V0,The global map of forest types provides a spat...,10,2020,GFT,ImageCollection,"""{}""","[0.0198034, 0.009971958, -0.1932867, -0.040694..."


In [40]:
db = lancedb.connect("s3://zeno-static-data/layers-context")
table = db.create_table("zeno-layers-context", data = df)

[2024-12-11T19:32:44Z WARN  lance_table::io::commit] Using unsafe commit handler. Concurrent writes may result in data loss. Consider providing a commit handler that prevents conflicting writes.


In [43]:
query_embedding = embedder.embed_query("I'd like to learn about magrove preservation in Argentina")

In [53]:
results = table.search(query_embedding).limit(5)
results.to_pandas()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,vector,_distance
0,Tropical tree cover,projects/wri-datalab/TropicalTreeCover,Land & Carbon Lab’s Tropical Tree Cover data s...,10,2020,b1,ImageCollection,"""{\""layer_type\"": \""continuous\"", \""value_mapp...","[0.05059206, 0.0018485745, -0.21603557, -0.032...",1.020332
1,ESA WorldCereal 10 m v100,ESA/WorldCereal/2021/MODELS/v100,The European Space Agency (ESA) WorldCereal 10...,10,2021,classification,ImageCollection,"""{}""","[-0.0023301935, 0.027219636, -0.20040773, 0.00...",1.029401
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2002,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116
3,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2003,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116
4,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2001,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116


In [54]:
filtered_results = table.search(query_embedding).where("year < 2020").limit(5)
filtered_results.to_pandas()

Unnamed: 0,name,dataset,description,resolution,year,band,type,metadata,vector,_distance
0,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2002,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116
1,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2003,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116
2,Dominant Grasslands,projects/global-pasture-watch/assets/ggc-30m/v...,This dataset provides global annual dominant c...,30,2001,dominant_class,ImageCollection,"""{\""layer_type\"": \""categorical\"", \""value_map...","[0.041484818, 0.010936345, -0.2224899, 0.00229...",1.030116
