In [1]:
from osgeo import gdal

In [2]:
gdal.__version__

'3.4.1'

In [None]:
!pip install pydantic
!pip install pandas
!pip install matplotlib seaborn

In [3]:
import time
import os
import subprocess
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from typing import Union, Optional
from pydantic import BaseModel, validator

In [None]:
sns.set_style('darkgrid')
plt.figure()

### Create Model structure

In [5]:
ROOT = 'assets'

class Config(BaseModel):
    input_file: Union[str, None] = 'Khon Kaen_F1_transparent_mosaic_group1.tif'
    output_mbtiles: Union[str, None] = None
    output_geojson: Union[str, None] = None
    
    @validator('output_mbtiles', pre=True)
    def output_file_join_path_mbtiles(cls, value):
        basename = os.path.basename(cls.input_file)
        output_file = os.path.join(ROOT, f'{basename}.mbtiles')
        return output_file
    
    @validator('output_geojson', pre=True)
    def output_file_join_path_geojson(cls, value):
        basename = os.path.basename(cls.input_file)
        output_file = os.path.join(ROOT, f'{basename}.geojson')
        return output_file
    

### Convert bytes 

- from os.path.getsize

In [6]:
def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.1f %s" % (size, x)
        size /= 1024.0

In [7]:
def result_measure_area(tif: str) -> dict:
    ds = gdal.Open(tif)
    
    # Get the GeoTransform matrix (affine transformation coefficients)
    gt = ds.GetGeoTransform()

    # Compute the pixel size (assumes square pixels)
    pixel_size = gt[1]
    
    # Get the band object
    band = ds.GetRasterBand(1)
    
    # Compute statistics for the band
    stats = band.GetStatistics(True, True)
    
    # Get the minimum and maximum pixel values
    min_val = stats[0]
    max_val = stats[1]
    
    # Compute the number of pixels with non-zero values
    nz_pixels = (band.ReadAsArray() != 0).sum()
    
    # Compute the area in square meters
    area_m2 = nz_pixels * pixel_size * pixel_size
    
    # Convert area from square meters to rai
    rai = area_m2 / 1600
    res = {
        'min_pixel': min_val,
        'max_pixel': max_val,
        'number_non_zero_pixel': nz_pixels,
        'pixel_size': f'{pixel_size} m',
        'area (sq.m)': area_m2,
        'area (rai)': rai
    }
    return res


## Config Scheme
- path input
- path output_mbtiles
- path output_geojson (optional)

In [8]:
config = Config()
tifs = []
root_path = 'assets/khonkaen_f5_f6_f7'
folder_tif = os.listdir(root_path)
for fn in folder_tif:
    if not fn.startswith('.'):
        if fn.endswith('.tif') or fn.endswith('.TIF'):
            file_tif = os.path.join(root_path, fn)
            tifs.append(file_tif)

tifs.sort()
tifs

['assets/khonkaen_f5_f6_f7/khonkaen_f5_subset1.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f5_subset3.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f5_subset4.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f6_subset1.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f6_subset3.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f6_subset4.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f8_subset0.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f8_subset3.TIF',
 'assets/khonkaen_f5_f6_f7/khonkaen_f8_subset4.TIF']

## Run Process convert .tif to .mbtiles

In [9]:
results = []
output_options = [
    "TILE_FORMAT=JPEG",
    "QUALITY=75",
    "RESAMPLING=average"
    "MAXZOOM=22"
]
    
for k, tif in enumerate(tifs):
    start_time = time.time()
    config.input_file = tif
    basename = os.path.basename(config.input_file).split('.')[0]
    config.output_mbtiles = f'output_mbtiles/{basename}.mbtiles'
    tif_file = gdal.Open(config.input_file)
    translate_options = gdal.TranslateOptions(
        format="MBTiles",
        creationOptions=output_options,
        bandList=[1, 2, 3]
    )
    gdal.Translate(config.output_mbtiles, config.input_file, options=translate_options)
    # gdal.Translate(config.output_mbtiles, config.input_file, format='MBTiles', options=['-co', 'TILE_FORMAT=JPEG'])
    tif_file = None
    end_time = time.time() - start_time
    minute = end_time / 60
    sz = os.path.getsize(tif)
    obj = {'time_taken': round(minute, 4), 'file_tif': tif, 'output_mbtiles': config.output_mbtiles, 'file_size': convert_bytes(sz)}
    measure = result_measure_area(config.input_file)
    obj.update(measure)
    results.append(obj)


## Another way convert file .tif to vector (geojson, shp)

**VectorTransalte, Transalte**

- gdal.Transalte(format="GeoJson")
- gdal.VectorTranslate(format="ESRI Shapefile")

In [None]:
# Convert GeoTIFF file to vector shapefile
input_file = "Khonkaen_F4_transparent_mosaic_group1.tif"
vector_file = "output.shp"
gdal.VectorTranslate(vector_file, input_file, format="ESRI Shapefile")

# Convert vector shapefile to MBTiles file
output_file = "output.mbtiles"
gdal.Translate(output_file, vector_file, format="MBTiles")

## F1 & F2

In [45]:
df = pd.DataFrame(results)
df

Unnamed: 0,time_taken,file_tif,output_mbtiles,file_size,min_pixel,max_pixel,number_non_zero_pixel,pixel_size,area (sq.m),area (rai)
0,0.1912,assets/khonkaen_f1_f2/khonkaen_f1_subset0.TIF,output_mbtiles/khonkaen_f1_subset0.mbtiles,135.6 MB,0.0,255.0,48836770,0.05063787766023837 m,125226.984549,78.266865
1,0.1817,assets/khonkaen_f1_f2/khonkaen_f1_subset3.TIF,output_mbtiles/khonkaen_f1_subset3.mbtiles,114.0 MB,0.0,255.0,48875340,0.05063787766023837 m,125325.885537,78.328678
2,0.1893,assets/khonkaen_f1_f2/khonkaen_f1_subset4.TIF,output_mbtiles/khonkaen_f1_subset4.mbtiles,168.4 MB,0.0,255.0,62409984,0.05063787766023837 m,160031.347325,100.019592
3,0.1818,assets/khonkaen_f1_f2/khonkaen_f1_subset7.TIF,output_mbtiles/khonkaen_f1_subset7.mbtiles,100.7 MB,0.0,255.0,59928965,0.05063787766023837 m,153669.531669,96.043457
4,0.1941,assets/khonkaen_f1_f2/khonkaen_f2_subset0.TIF,output_mbtiles/khonkaen_f2_subset0.mbtiles,139.9 MB,0.0,255.0,44426166,0.051930000000000386 m,119805.148064,74.878218
5,0.1893,assets/khonkaen_f1_f2/khonkaen_f2_subset1.TIF,output_mbtiles/khonkaen_f2_subset1.mbtiles,134.1 MB,0.0,255.0,43783928,0.051930000000000386 m,118073.208857,73.795756
6,0.196,assets/khonkaen_f1_f2/khonkaen_f2_subset3.TIF,output_mbtiles/khonkaen_f2_subset3.mbtiles,175.8 MB,0.0,255.0,57004516,0.051930000000000386 m,153725.49771,96.078436
7,0.1974,assets/khonkaen_f1_f2/khonkaen_f2_subset4.TIF,output_mbtiles/khonkaen_f2_subset4.mbtiles,171.5 MB,0.0,255.0,56225057,0.051930000000000386 m,151623.511216,94.764695
8,0.1968,assets/khonkaen_f1_f2/khonkaen_f2_subset6.TIF,output_mbtiles/khonkaen_f2_subset6.mbtiles,181.2 MB,0.0,255.0,59835077,0.051930000000000386 m,161358.742039,100.849214
9,0.1987,assets/khonkaen_f1_f2/khonkaen_f2_subset7.TIF,output_mbtiles/khonkaen_f2_subset7.mbtiles,187.6 MB,0.0,255.0,62401955,0.051930000000000386 m,168280.905857,105.175566


## F5 & F6 & F8

In [10]:
df = pd.DataFrame(results)
df

Unnamed: 0,time_taken,file_tif,output_mbtiles,file_size,min_pixel,max_pixel,number_non_zero_pixel,pixel_size,area (sq.m),area (rai)
0,0.2026,assets/khonkaen_f5_f6_f7/khonkaen_f5_subset1.TIF,output_mbtiles/khonkaen_f5_subset1.mbtiles,162.5 MB,0.0,255.0,56162227,0.050800000000000886 m,144934.489485,90.584056
1,0.187,assets/khonkaen_f5_f6_f7/khonkaen_f5_subset3.TIF,output_mbtiles/khonkaen_f5_subset3.mbtiles,147.7 MB,0.0,255.0,57256969,0.050800000000000886 m,147759.62448,92.349765
2,0.1905,assets/khonkaen_f5_f6_f7/khonkaen_f5_subset4.TIF,output_mbtiles/khonkaen_f5_subset4.mbtiles,171.8 MB,0.0,255.0,62409886,0.050800000000000886 m,161057.448207,100.660905
3,0.1901,assets/khonkaen_f5_f6_f7/khonkaen_f6_subset1.TIF,output_mbtiles/khonkaen_f6_subset1.mbtiles,172.8 MB,0.0,255.0,59504734,0.05082000000000035 m,153681.234171,96.050771
4,0.19,assets/khonkaen_f5_f6_f7/khonkaen_f6_subset3.TIF,output_mbtiles/khonkaen_f6_subset3.mbtiles,171.7 MB,0.0,255.0,58576718,0.05082000000000035 m,151284.472861,94.552796
5,0.192,assets/khonkaen_f5_f6_f7/khonkaen_f6_subset4.TIF,output_mbtiles/khonkaen_f6_subset4.mbtiles,176.1 MB,0.0,255.0,62409986,0.05082000000000035 m,161184.548327,100.740343
6,0.4928,assets/khonkaen_f5_f6_f7/khonkaen_f8_subset0.TIF,output_mbtiles/khonkaen_f8_subset0.mbtiles,144.8 MB,0.0,255.0,51794212,0.05037999999999729 m,131461.18914,82.163243
7,0.4946,assets/khonkaen_f5_f6_f7/khonkaen_f8_subset3.TIF,output_mbtiles/khonkaen_f8_subset3.mbtiles,165.8 MB,0.0,255.0,60126495,0.05037999999999729 m,152609.726576,95.381079
8,0.5004,assets/khonkaen_f5_f6_f7/khonkaen_f8_subset4.TIF,output_mbtiles/khonkaen_f8_subset4.mbtiles,158.3 MB,0.0,255.0,62251940,0.05037999999999729 m,158004.4129,98.752758


In [12]:
def summary_report(col: str) -> dict:
    mean = df[col].mean()
    _sum = df[col].sum()
    std = df[col].std()
    print('mean score: ', round(mean, 4))
    print('sum score: ', round(_sum, 4))
    print('standard deviation score: ', std)
    return {'mean': round(mean, 4), 'sum': round(_sum, 4), 'sd': std}
    

## F1 & F2
### Time taken

In [40]:
display('Time taked')
summary_report('time_taken')

'Time taked'

mean score:  0.1916
sum score:  1.9163
standard deviation score:  0.006159734842778002


{'mean': 0.1916, 'sum': 1.9163, 'sd': 0.006159734842778002}

## F1 & F2
### Area (rai)

In [41]:
display('Area')
summary_report('area (rai)')

'Area'

mean score:  89.82
sum score:  898.2005
standard deviation score:  12.061770454536566


{'mean': 89.82, 'sum': 898.2005, 'sd': 12.061770454536566}

## F5 & F6 & F8
### Time taken

In [15]:
summary_report('time_taken')

mean score:  0.2933
sum score:  2.64
standard deviation score:  0.15202350969504683


{'mean': 0.2933, 'sum': 2.64, 'sd': 0.15202350969504683}

## F5 & F6 & F8
### Area (rai)

In [16]:
summary_report('area (rai)')

mean score:  94.5817
sum score:  851.2357
standard deviation score:  5.812294222100304


{'mean': 94.5817, 'sum': 851.2357, 'sd': 5.812294222100304}

## End processed