<a href="https://colab.research.google.com/github/ua-datalab/Geospatial_Workshops/blob/main/notebooks/STAC_crawl2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install pystac_client. This library is used to crawl SpatioTemporal Asset Catalogs (STAC)
!pip install pystac_client --quiet
!pip install planetary_computer --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/181.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/181.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.6/181.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
##Install library to work with raster images

!pip install rasterio --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.6/20.6 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
#Import the libraries into the current session

import pystac_client
import planetary_computer



In [4]:
#Use the pystac_client library to open the Planetary Computer STAC API

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

In [5]:
#Let's look at the collections within the root catalog
collections = list(catalog.get_collections())

# Print the number of collections
print(f"Number of collections in the base catalog: {len(collections)}")

Number of collections in the base catalog: 123


In [10]:
# Print the names (or IDs) and descriptions of each collection
for collection in collections:
    print(f"ID: {collection.id}")
    print(f"Title: {collection.title}")

ID: daymet-annual-pr
Title: Daymet Annual Puerto Rico
ID: daymet-daily-hi
Title: Daymet Daily Hawaii
ID: 3dep-seamless
Title: USGS 3DEP Seamless DEMs
ID: 3dep-lidar-dsm
Title: USGS 3DEP Lidar Digital Surface Model
ID: fia
Title: Forest Inventory and Analysis
ID: sentinel-1-rtc
Title: Sentinel 1 Radiometrically Terrain Corrected (RTC)
ID: gridmet
Title: gridMET
ID: daymet-annual-na
Title: Daymet Annual North America
ID: daymet-monthly-na
Title: Daymet Monthly North America
ID: daymet-annual-hi
Title: Daymet Annual Hawaii
ID: daymet-monthly-hi
Title: Daymet Monthly Hawaii
ID: daymet-monthly-pr
Title: Daymet Monthly Puerto Rico
ID: gnatsgo-tables
Title: gNATSGO Soil Database - Tables
ID: hgb
Title: HGB: Harmonized Global Biomass for 2010
ID: cop-dem-glo-30
Title: Copernicus DEM GLO-30
ID: cop-dem-glo-90
Title: Copernicus DEM GLO-90
ID: goes-cmi
Title: GOES-R Cloud & Moisture Imagery
ID: terraclimate
Title: TerraClimate
ID: nasa-nex-gddp-cmip6
Title: Earth Exchange Global Daily Downscaled 

In [11]:
#Create a custom spatial and temporal filter to find assets of interest

time_range = "2020-12-01/2020-12-31"
bbox = [-122.2751, 47.5469, -121.9613, 47.7458] #SW corner longitude/latitude ; NE corner longitude/latitude

In [12]:
#Search the 'Landsat Collection' to find imagery assets within my time-range and bounding box.

search = catalog.search(collections=["landsat-c2-l2"], bbox=bbox, datetime=time_range)
items = search.get_all_items()
len(items)



8

In [None]:
##Display the items in a table and show their characteristics

#Use geopandas to make the table. It is already installed in Colab notebook

import geopandas

df = geopandas.GeoDataFrame.from_features(items.to_dict(), crs="epsg:4326")
df

In [17]:
##Filter to find the item with the lowest cloud cover

selected_item = min(items, key=lambda item: item.properties["eo:cloud_cover"])
print(selected_item)

<Item id=LC08_L2SP_047027_20201204_02_T1>


In [None]:
# List all the assets and properties for the selected item

assets_info = [
    {
        "Asset Key": asset_key,
        "Asset Title": asset.title if hasattr(asset, "title") else "N/A",
        "Asset Type": asset.media_type
    }
    for asset_key, asset in selected_item.assets.items()
]

# Create a GeoDataFrame from the assets information
assets_gdf = geopandas.GeoDataFrame(assets_info)

# Print the GeoDataFrame
print(assets_gdf)

In [20]:
# List all the assets for the selected item

import rich.table

table = rich.table.Table("Asset Key", "Description")
for asset_key, asset in selected_item.assets.items():
    table.add_row(asset_key, asset.title)

table

In [26]:
#Convert the 'rendered preview' asset into a dictionary

selected_item.assets["rendered_preview"].to_dict()

{'href': 'https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=landsat-c2-l2&item=LC08_L2SP_047027_20201204_02_T1&assets=red&assets=green&assets=blue&color_formula=gamma+RGB+2.7%2C+saturation+1.5%2C+sigmoidal+RGB+15+0.55&format=png',
 'type': 'image/png',
 'title': 'Rendered preview',
 'rel': 'preview',
 'roles': ['overview']}

In [23]:
#Display the 'rendered preview' asset of the item

from IPython.display import Image

Image(url=selected_item.assets["rendered_preview"].href, width=500)

In [27]:
#Get the API endpoint (url) of the 'blue' band asset.

selected_item.assets["blue"].href

'https://landsateuwest.blob.core.windows.net/landsat-c2/level-2/standard/oli-tirs/2020/047/027/LC08_L2SP_047027_20201204_20210313_02_T1/LC08_L2SP_047027_20201204_20210313_02_T1_SR_B2.TIF?st=2024-03-11T17%3A12%3A45Z&se=2024-03-12T17%3A57%3A45Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-03-12T08%3A40%3A24Z&ske=2024-03-19T08%3A40%3A24Z&sks=b&skv=2021-06-08&sig=NzhZGSIfsFLAu/GhBoP3%2BN%2B%2ByqXpOk935fX7WvrGTes%3D'

In [31]:
##Get some info from the asset without downloading it
## Get response code, file type, file size
## We are looking for HTTP status code of 200

import requests

# Send a HEAD request to get the headers of the file
response = requests.head(selected_item.assets["blue"].href)

# Retrieve the status code
status_code = response.status_code

# Initialize variables for file type and size
file_type = None
file_size_mb = None

# Check if the Content-Type header exists
if 'Content-Type' in response.headers:
    file_type = response.headers['Content-Type']

# Check if the Content-Length header exists and convert it to megabytes
if 'Content-Length' in response.headers:
    file_size_bytes = int(response.headers['Content-Length'])
    file_size_mb = file_size_bytes / (1024 * 1024)  # Convert bytes to megabytes

print(f"Status Code: {status_code}")
print(f"File Type: {file_type}")
print(f"File Size: {file_size_mb:.2f} MB")


Status Code: 200
File Type: image/tiff
File Size: 75.27 MB


In [32]:
!pip install rioxarray --quiet

Collecting rioxarray
  Downloading rioxarray-0.15.1-py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m51.2/53.7 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.7/53.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rioxarray
Successfully installed rioxarray-0.15.1


In [None]:
import rasterio
from rasterio.plot import show
import matplotlib.pyplot as plt
import requests
import numpy as np

In [33]:
import rioxarray

ds = rioxarray.open_rasterio(
    selected_item.assets["blue"].href, overview_level=4
).squeeze()
img = ds.plot(cmap="Blues", add_colorbar=False)
img.axes.set_axis_off();



RasterioIOError: HTTP response code: 403

In [None]:
ds

In [None]:
img