In [1]:
import os
import json
import xml.etree.ElementTree as ET
from datetime import datetime

import rasterio
import geopandas as gpd
import matplotlib.pyplot as plt
import pystac
import sys
import constants
from shapely.geometry import mapping, box







In [2]:

base_dir="data/"
qml_path="data/style_file.qml"
vector_qml_file ="data/swb_style.qml"

raster_filename="saraikela-kharsawan_gobindpur_2023-07-01_2024-06-30_LULCmap_10m.tif"
vector_filename="swb2_saraikela-kharsawan_gobindpur.geojson"

corestack_dir = os.path.join(base_dir, "CorestackCatalogs")
gobindpur_dir = os.path.join(corestack_dir, "gobindpur")
raster_dir = os.path.join(gobindpur_dir, "raster")
vector_dir = os.path.join(gobindpur_dir, "vector")

os.makedirs(raster_dir, exist_ok=True)
os.makedirs(vector_dir, exist_ok=True)


raster_filename = raster_filename
vector_filename = vector_filename
raster_path = os.path.join(base_dir, raster_filename)
vector_path = os.path.join(base_dir, vector_filename)

raster_thumb = os.path.join(raster_dir, "thumbnail.png")
vector_thumb = os.path.join(vector_dir, "thumbnail.png")

raster_style_file = os.path.join(base_dir, "style_file.qml")
vector_style_file = os.path.join(base_dir, "swb_style.qml")


In [3]:
def extract_raster_dates_from_filename(filename):
    try:
        print(filename)
        parts = filename.split('_')
        start_date = datetime.strptime(parts[2], "%Y-%m-%d")
        end_date = datetime.strptime(parts[3], "%Y-%m-%d")
        print(start_date)
        print(end_date)
    except Exception as e:
        raise ValueError(f"Failed to extract raster dates from filename '{filename}': {e}")
        
    return start_date, end_date    

In [4]:
extract_raster_dates_from_filename(raster_filename)

saraikela-kharsawan_gobindpur_2023-07-01_2024-06-30_LULCmap_10m.tif
2023-07-01 00:00:00
2024-06-30 00:00:00


(datetime.datetime(2023, 7, 1, 0, 0), datetime.datetime(2024, 6, 30, 0, 0))

In [5]:


def parse_qml_classes(qml_path):
    tree = ET.parse(qml_path)
    root = tree.getroot()
    classes = []

    for entry in root.findall(".//paletteEntry"):
        class_info = {}
        for attr_key, attr_value in entry.attrib.items():
            if attr_key == "value":
                try:
                    class_info[attr_key] = int(attr_value)
                except ValueError:
                    class_info[attr_key] = attr_value
            else:
                class_info[attr_key] = attr_value
        classes.append(class_info)
    return classes

In [6]:
def generate_raster_thumbnail(tif_path, out_path):
    with rasterio.open(tif_path) as src:
        arr = src.read(1)
    plt.figure(figsize=(3, 3))
    plt.imshow(arr, cmap="tab20")
    plt.axis('off')
    plt.savefig(out_path, bbox_inches='tight', pad_inches=0)
    plt.close()

def generate_vector_thumbnail(vector_path, out_path):
    gdf = gpd.read_file(vector_path)
    if gdf.crs is None or gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(epsg=4326)
    fig, ax = plt.subplots(figsize=(3, 3))
    fig.patch.set_facecolor("white")
    ax.set_facecolor("white")
    gdf.plot(ax=ax, color="lightblue", edgecolor="blue", linewidth=0.5)
    ax.axis('off')
    plt.savefig(out_path, dpi=150, bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor())
    plt.close()


In [7]:
def create_raster_item():
    try:
        start_date, end_date = extract_raster_dates_from_filename(raster_filename)
    except ValueError as e:
        raise RuntimeError(f"Raster item creation failed")
    
    with rasterio.open(raster_path) as src:
        bounds = src.bounds
        geom = mapping(box(*bounds))
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]

    generate_raster_thumbnail(raster_path, raster_thumb)
    style_info = parse_qml_classes(raster_style_file)

    print(style_info)
   
    style_json_path = os.path.join(raster_dir, "legend.json")
    with open(style_json_path, "w") as f:
        json.dump(style_info, f, indent=2)

    

    

    item = pystac.Item(
        id=constants.raster_lulc_id,
        geometry=geom,
        bbox=bbox,
        datetime=start_date,
        start_datetime= start_date,
        end_datetime= end_date,
        properties={
            "title" :constants.raster_lulc_title,
            "description":constants.raster_lulc_description,
            "lulc:classes": style_info,
            
        }
    )
    print(item)
    

    item.add_asset("data", pystac.Asset(
        href=f"{constants.data_url}/{raster_filename}",
        media_type=pystac.MediaType.GEOTIFF,
        roles=["data"],
        title="Raster Layer"
    ))
    item.add_asset("thumbnail", pystac.Asset(
        href=f"{constants.base_url}/raster/thumbnail.png",
        media_type=pystac.MediaType.PNG,
        roles=["thumbnail"],
        title="Raster Thumbnail"
    ))
    item.add_asset("legend", pystac.Asset(
        href=f"{constants.base_url}/raster/legend.json",
        media_type=pystac.MediaType.JSON,
        roles=["metadata"],
        title="Legend JSON"
    ))
    item.add_asset("style", pystac.Asset(
        href=f"{constants.data_url}/{qml_path}",
        media_type=pystac.MediaType.TEXT,
        roles=["metadata"],
        title="Raster style"
    ))

    item.set_self_href(os.path.join(raster_dir, "item.json"))
    item.save_object()
    return item


In [8]:
raster_item=create_raster_item()

saraikela-kharsawan_gobindpur_2023-07-01_2024-06-30_LULCmap_10m.tif
2023-07-01 00:00:00
2024-06-30 00:00:00
[{'value': 0, 'label': 'clear', 'alpha': '0', 'color': '#000000'}, {'value': 1, 'label': 'built up', 'alpha': '255', 'color': '#ff0000'}, {'value': 2, 'label': 'kharif water', 'alpha': '255', 'color': '#74ccf4'}, {'value': 3, 'label': 'kharif and rabi water', 'alpha': '255', 'color': '#1ca3ec'}, {'value': 4, 'label': 'kharif and rabi and zaid water', 'alpha': '255', 'color': '#0f5e9c'}, {'value': 5, 'label': 'croplands', 'alpha': '255', 'color': '#f1c232'}, {'value': 6, 'label': 'Tree/Forests', 'alpha': '255', 'color': '#38761d'}, {'value': 7, 'label': 'barren lands', 'alpha': '255', 'color': '#a9a9a9'}, {'value': 8, 'label': 'Single Kharif Cropping', 'alpha': '255', 'color': '#bad93e'}, {'value': 9, 'label': 'Single Non-Kharif Cropping', 'alpha': '255', 'color': '#f59d22'}, {'value': 10, 'label': 'Double Cropping', 'alpha': '255', 'color': '#ff9371'}, {'value': 11, 'label': 'Tri

In [9]:
def create_vector_item():
    start_date = constants.DEFAULT_START_DATE
    end_date = constants.DEFAULT_END_DATE

    gdf = gpd.read_file(vector_path)
    geom = mapping(gdf.union_all())
    bounds = gdf.total_bounds
    bbox = [float(b) for b in bounds]

    generate_vector_thumbnail(vector_path, vector_thumb)
    style_info = parse_qml_classes(vector_style_file)
    style_json_path = os.path.join(vector_dir, "style.json")
    with open(style_json_path, "w") as f:
        json.dump(style_info, f, indent=2)
    
    
    


    item = pystac.Item(
        id=constants.swb_vector_id,
        geometry=geom,
        bbox=bbox,
        datetime=start_date,
        start_datetime= start_date,
        end_datetime= end_date,
        properties={
            "title": "constants.swb_vector_title",
            "description": "constants.swb_vector_description",
            "style": "style_data",
            
        }
    )
    print(item)


    item.add_asset("data", pystac.Asset(
        href=f"{constants.data_url}/{vector_filename}",
        media_type=pystac.MediaType.GEOJSON,
        roles=["data"],
        title="Vector Layer"
    ))
    item.add_asset("thumbnail", pystac.Asset(
        href=f"{constants.base_url}/vector/thumbnail.png",
        media_type=pystac.MediaType.PNG,
        roles=["thumbnail"],
        title="Vector Thumbnail"
    ))
    item.add_asset("style", pystac.Asset(
        href=f"{constants.data_url}/{vector_style_file}",
        media_type=pystac.MediaType.TEXT,
        roles=["style"],
        title="Vector style"
    ))
    item.set_self_href(os.path.join(vector_dir, "item.json"))
    item.save_object()
    return item


In [10]:
gdf = gpd.read_file(vector_path)

In [11]:
gdf.shape

(2306, 36)

df.shape

In [12]:

gdf.columns


Index(['id', 'MWS_UID', 'UID', 'any', 'area_17-18', 'area_18-19', 'area_19-20',
       'area_20-21', 'area_21-22', 'area_22-23', 'area_23-24', 'area_ored',
       'category_sq_m', 'k_17-18', 'k_18-19', 'k_19-20', 'k_20-21', 'k_21-22',
       'k_22-23', 'k_23-24', 'kr_17-18', 'kr_18-19', 'kr_19-20', 'kr_20-21',
       'kr_21-22', 'kr_22-23', 'kr_23-24', 'krz_17-18', 'krz_18-19',
       'krz_19-20', 'krz_20-21', 'krz_21-22', 'krz_22-23', 'krz_23-24',
       'water', 'geometry'],
      dtype='object')

In [13]:
gdf.describe()


Unnamed: 0,any,area_17-18,area_18-19,area_19-20,area_20-21,area_21-22,area_22-23,area_23-24,area_ored,k_17-18,...,kr_22-23,kr_23-24,krz_17-18,krz_18-19,krz_19-20,krz_20-21,krz_21-22,krz_22-23,krz_23-24,water
count,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,...,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0,2306.0
mean,1.0,0.443362,0.508069,0.454478,0.483303,0.440705,0.427061,0.34649,0.659403,33.811387,...,38.455482,25.248525,22.49737,32.520915,32.617556,24.167454,25.214399,22.492727,16.908545,1.0
std,0.0,5.493991,6.038191,5.165093,6.041677,5.359312,5.213407,4.449514,6.821575,37.391508,...,37.770053,31.147126,32.42235,39.559814,36.673447,34.33859,32.381335,32.149746,27.189145,0.0
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00302,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
50%,1.0,0.01,0.03,0.03,0.01,0.02,0.02,0.0,0.11,14.285714,...,33.333333,0.0,0.0,0.0,11.687563,0.0,0.0,0.0,0.0,1.0
75%,1.0,0.159108,0.22,0.2,0.18,0.18,0.18,0.12,0.35,71.428571,...,73.333333,51.268138,47.979167,73.365801,68.826844,51.914006,53.79925,50.0,33.333333,1.0
max,1.0,155.05251,181.391647,147.923569,158.621882,160.846392,168.442706,128.074706,197.097843,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,1.0


In [14]:
gdf.dtypes

id                 object
MWS_UID            object
UID                object
any                 int32
area_17-18        float64
area_18-19        float64
area_19-20        float64
area_20-21        float64
area_21-22        float64
area_22-23        float64
area_23-24        float64
area_ored         float64
category_sq_m      object
k_17-18           float64
k_18-19           float64
k_19-20           float64
k_20-21           float64
k_21-22           float64
k_22-23           float64
k_23-24           float64
kr_17-18          float64
kr_18-19          float64
kr_19-20          float64
kr_20-21          float64
kr_21-22          float64
kr_22-23          float64
kr_23-24          float64
krz_17-18         float64
krz_18-19         float64
krz_19-20         float64
krz_20-21         float64
krz_21-22         float64
krz_22-23         float64
krz_23-24         float64
water               int32
geometry         geometry
dtype: object

In [15]:

vector_item = create_vector_item()


<Item id=SWB vector>


In [16]:

catalog = pystac.Catalog(
    id=constants.id_main,
    title=constants.title_main,
    description=constants.description_main
)
catalog.add_item(create_raster_item())
catalog.add_item(create_vector_item())
catalog.set_self_href(os.path.join(gobindpur_dir, "catalog.json"))

corestack_catalog = pystac.Catalog(
    id="corestack",
    title="CorestackCatalogs",
    description="Root catalog containing all subcatalogs"
)
corestack_catalog.add_child(catalog)
corestack_catalog.set_self_href(os.path.join(corestack_dir, "catalog.json"))
corestack_catalog.normalize_and_save(corestack_dir, catalog_type=pystac.CatalogType.SELF_CONTAINED)

print(f" Root STAC Catalog created at: {os.path.join(corestack_dir, 'catalog.json')}")


saraikela-kharsawan_gobindpur_2023-07-01_2024-06-30_LULCmap_10m.tif
2023-07-01 00:00:00
2024-06-30 00:00:00
[{'value': 0, 'label': 'clear', 'alpha': '0', 'color': '#000000'}, {'value': 1, 'label': 'built up', 'alpha': '255', 'color': '#ff0000'}, {'value': 2, 'label': 'kharif water', 'alpha': '255', 'color': '#74ccf4'}, {'value': 3, 'label': 'kharif and rabi water', 'alpha': '255', 'color': '#1ca3ec'}, {'value': 4, 'label': 'kharif and rabi and zaid water', 'alpha': '255', 'color': '#0f5e9c'}, {'value': 5, 'label': 'croplands', 'alpha': '255', 'color': '#f1c232'}, {'value': 6, 'label': 'Tree/Forests', 'alpha': '255', 'color': '#38761d'}, {'value': 7, 'label': 'barren lands', 'alpha': '255', 'color': '#a9a9a9'}, {'value': 8, 'label': 'Single Kharif Cropping', 'alpha': '255', 'color': '#bad93e'}, {'value': 9, 'label': 'Single Non-Kharif Cropping', 'alpha': '255', 'color': '#f59d22'}, {'value': 10, 'label': 'Double Cropping', 'alpha': '255', 'color': '#ff9371'}, {'value': 11, 'label': 'Tri