<a href="https://colab.research.google.com/github/ua-datalab/Geospatial_Workshops/blob/main/notebooks/STAC_crawl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install pystac_client. This library is used to crawl SpatioTemporal Asset Catalogs (STAC)
!pip install pystac_client --quiet
!pip install planetary_computer --quiet

In [3]:
#Import the libraries into the current session

import pystac_client
import planetary_computer



In [4]:
#Use the pystac_client library to open the Planetary Computer STAC API

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

In [None]:
#Let's look at the collections within the root catalog
collections = list(catalog.get_collections())

# Print the number of collections
print(f"Number of collections in the base catalog: {len(collections)}")

In [None]:
# Print the names (or IDs) and descriptions of each collection
for collection in collections:
    print(f"ID: {collection.id}")
    print(f"Title: {collection.title}")

In [7]:
#Create a custom spatial and temporal filter to find assets of interest

time_range = "2020-12-01/2020-12-31"
bbox = [-122.2751, 47.5469, -121.9613, 47.7458] #SW corner longitude/latitude ; NE corner longitude/latitude

In [None]:
#Search the 'Landsat Collection' to find imagery assets within my time-range and bounding box.

search = catalog.search(collections=["landsat-c2-l2"], bbox=bbox, datetime=time_range)
items = search.get_all_items()
len(items)



In [None]:
##Display the items in a table and show their characteristics

#Use geopandas to make the table. It is already installed in Colab notebook

import geopandas

df = geopandas.GeoDataFrame.from_features(items.to_dict(), crs="epsg:4326")
df

In [None]:
##Filter to find the item with the lowest cloud cover

selected_item = min(items, key=lambda item: item.properties["eo:cloud_cover"])
print(selected_item)

In [None]:
# List all the assets for the selected item

import rich.table

table = rich.table.Table("Asset Key", "Description", "Asset Type" )
for asset_key, asset in selected_item.assets.items():
    table.add_row(asset_key, asset.title, asset.media_type)

table

In [None]:
#Convert the 'rendered preview' asset into a dictionary

selected_item.assets["rendered_preview"].to_dict()

In [None]:
#Display the 'rendered preview' asset of the item

from IPython.display import Image

Image(url=selected_item.assets["rendered_preview"].href, width=500)

In [None]:
#Get the API endpoint (url) of the 'blue' band asset.

selected_item.assets["blue"].href

In [None]:
##Get some info from the asset without downloading it
## Get response code, file type, file size
## We are looking for HTTP status code of 200

import requests

# Send a HEAD request to get the headers of the file
response = requests.head(selected_item.assets["blue"].href)

# Retrieve the status code
status_code = response.status_code

# Initialize variables for file type and size
file_type = None
file_size_mb = None

# Check if the Content-Type header exists
if 'Content-Type' in response.headers:
    file_type = response.headers['Content-Type']

# Check if the Content-Length header exists and convert it to megabytes
if 'Content-Length' in response.headers:
    file_size_bytes = int(response.headers['Content-Length'])
    file_size_mb = file_size_bytes / (1024 * 1024)  # Convert bytes to megabytes

print(f"Status Code: {status_code}")
print(f"File Type: {file_type}")
print(f"File Size: {file_size_mb:.2f} MB")


In [None]:
##Pull the selected asset (cloud optimized geotiff) into my notebook

#install and import library for display
!pip install rioxarray --quiet

import rioxarray


#Display the selected asset with coarser resolution.
#The asset is a COG so it has overviews embedded

ds = rioxarray.open_rasterio(
    selected_item.assets["blue"].href, overview_level=2
).squeeze()
img = ds.plot(cmap="viridis", add_colorbar=False)
img.axes.set_axis_off();