In [2]:
import os
import ogr
import gdal
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Create path to data directory, make sure it exists
data_dir = os.path.join(os.path.normpath(os.getcwd() + os.sep + os.pardir),"Data")
assert os.path.isdir

In [10]:
# Define paths to shapefile and shape files + make sure it exists
trdf = os.path.join(data_dir, "Training_Data.shp")
assert os.path.isfile(trdf)

valf = os.path.join(data_dir, "Validation_Data.shp")
assert os.path.isfile(valf)

imgf = os.path.join(data_dir, "CASI_Subset.tif")
assert os.path.isfile(imgf)

In [21]:
# Lets write a function that gives us the row/column offset and the number of rows and columns to extract
# This function will help us extract the box of pixels around the feature in question. the geotransform 
# comes from the raster, the bounding box comes from a specific feature in a shapfile

def bbox_to_pixel_offsets(geo_transform, bounding_box):
    origin_x = geo_transform[0]
    origin_y = geo_transform[3]
    pixel_width = geo_transform[1]
    pixel_height = geo_transform[5]
    x1 = int(round((bounding_box[0] - origin_x) / pixel_width))
    x2 = int(round((bounding_box[1] - origin_x) / pixel_width))
    y1 = int(round((bounding_box[3] - origin_y) / pixel_height))
    y2 = int(round((bounding_box[2] - origin_y) / pixel_height))
    xsize = x2 - x1
    ysize = y2 - y1
    return [x1, y1, xsize, ysize]
    

In [26]:
def extract_data(vector_path, raster_path, fid, class_field_name=None):
    
    # Turn raster data into a numpy array
    rds = gdal.Open(raster_path)
    gt = rds.GetGeoTransform()
    
    # Open the vector dataset and get the data layer
    vds = ogr.Open(vector_path)
    lyr = vds.GetLayer(0)
    
    #get the feature provided from the function arguements
    feature = lyr.GetFeature(fid)
    bbox = feature.geometry().GetEnvelope()
    
    offset = bbox_to_pixel_offsets(gt, bbox)
    
    # Two ways of doing this ...
    # 1. use arguements of ReadAsArray to only read the part of the raster we want
    option1 = rds.ReadAsArray(xoff=offset[0], yoff=offset[1], xsize=offset[2], ysize=offset[3])
    
    #2. Read the array into memory and then index the array
    # Why would we want to do one over the other          <--------------------------QUESTION
    # Preforming "ReadAsArray" on the dataset assumes that you want all of the bands
    # returned to you in a (bands, rows, columns) shaped array
    data = rds.ReadAsArray()
    option2 = data[:,offset[1]:offset[1]+offset[3],offset[0]:offset[0]+offset[2]]
    
    return option1, option2
    
    
    
    

In [27]:
a, b = extract_data(trdf, imgf, 0)

In [33]:
a.all() == b.all()

True

(5, 13, 12)