<a href="https://colab.research.google.com/github/yogiwinardhana/GIS_programming/blob/main/1_read_geotiff_to_array.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load GeoTIFF as Numpy Array

In [None]:
!pip install datasets rasterio pyproj



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import sys
import csv
import time
import glob
import joblib
import datasets
import datetime
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint

from huggingface_hub import snapshot_download

from sklearn.ensemble import RandomForestClassifier as skRF
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.inspection import permutation_importance

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

# Geospatial related imports
from osgeo import gdalconst
from osgeo import gdal
import folium
from folium import plugins
import folium_helper

plt.style.use('fivethirtyeight')
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
# output directory
OUTPUT_DIR = 'output'

# path for folder, consist of individual bands
PATH_FILE = '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands'

In [None]:
os.makedirs(OUTPUT_DIR)

In [None]:
# get individual bands path
file_list = sorted([x for x in glob.glob(os.path.join(PATH_FILE, '*.tif'))])
file_list

['/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_10.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b1.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b2.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b3.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b4.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b5.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10bands_1300_1300_b6.tif',
 '/content/drive/MyDrive/2_geospatial_project/binary_classification_black_sea/individual_bands/modis_10b

In [None]:
# read raster to array
def read_raster_to_array(file_list):
  raster_projection = None
  new_shape = (1300*1300, 10)
  img = np.empty(new_shape, dtype=np.int16)

  for i, file_name in enumerate(file_list):
    ds = gdal.Open(file_name)
    img[:, i] = ds.GetRasterBand(1).ReadAsArray().astype(np.int16).ravel()

    if i == 0:
      raster_projection = ds.GetProjection()

    ds = None

    return img, raster_projection


In [None]:
%%time
im, raster_projection = read_raster_to_array(file_list)
print('Raster as ndarray')
print(im)

print('{} MB size'.format((im.size * im.itemsize) / 1000000))

Raster as ndarray
[[-7512     0     0 ...     0     0     0]
 [-8331     0     0 ...     0     0     0]
 [-7783     0     0 ...     0     0     0]
 ...
 [-5587     0     0 ...     0     0     0]
 [-5268     0     0 ...     0     0     0]
 [-5016     0     0 ...     0     0     0]]
33.8 MB size
CPU times: user 8.86 ms, sys: 15.3 ms, total: 24.2 ms
Wall time: 34.6 ms


In [None]:
list_columns = ['SR_1', 'SR_2', 'SR_3', 'SR_4', 'SR_5', 'SR_6', 'SR_7', 'ndvi', 'swir1', 'swir2']

raster_dataframe = pd.DataFrame(im, columns=list_columns, dtype=np.float32)
raster_dataframe

Unnamed: 0,SR_1,SR_2,SR_3,SR_4,SR_5,SR_6,SR_7,ndvi,swir1,swir2
0,-7512.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-8331.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-7783.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-7307.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-7372.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
1689995,-5412.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1689996,-5296.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1689997,-5587.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1689998,-5268.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
