In [9]:
import os
import numpy as np
import tarfile
import geopandas as gpd
import rasterio
import shutil
import random
from PIL import Image
from rasterio.windows import Window
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask

In [None]:
# OLD
def extract_files(source_directory, target_directory, clipped_directory, shapefile_path):
    for root, dirs, files in os.walk(source_directory):
        for name in files:
            if name.endswith('.tar'):
                tar_path = os.path.join(root, name)
                rasterfileName = name.split('.')[0]
                file_target_directory = os.path.join(target_directory, rasterfileName)
                os.makedirs(file_target_directory, exist_ok=True)

                with tarfile.open(tar_path, 'r') as tar:
                    tar.extractall(path=file_target_directory)

                process_files(file_target_directory, clipped_directory, rasterfileName, shapefile_path)

def clip_raster(raster_path, shapes):
    with rasterio.open(raster_path) as src:
        out_image, out_transform = mask(src, shapes, crop=True)
        out_meta = src.meta.copy()
        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

        return out_image, out_meta

def process_files(file_target_directory, clipped_directory, rasterfileName, shapefile_path):
    shp = gpd.read_file(shapefile_path)
    geo = shp.geometry
    bands = ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11"]

    for band in bands:
        input_path = os.path.join(file_target_directory, f"{rasterfileName}_{band}.TIF")
        temp_output_path = os.path.join(file_target_directory, f"temp_{rasterfileName}_{band}.TIF")

        with rasterio.open(input_path) as src:
            transform, width, height = calculate_default_transform(
                src.crs, 'EPSG:3826', src.width, src.height, *src.bounds)

            kwargs = src.meta.copy()
            kwargs.update({
                'crs': 'EPSG:3826',
                'transform': transform,
                'width': width,
                'height': height
            })

            with rasterio.open(temp_output_path, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs='EPSG:3826',
                        resampling=Resampling.nearest)


        clipped, meta = clip_raster(temp_output_path, geo)
        raster_clipped_folder = os.path.join(clipped_directory, rasterfileName)
        os.makedirs(raster_clipped_folder, exist_ok=True)
        output_path = os.path.join(raster_clipped_folder, f"{rasterfileName}_{band}.tif")
        with rasterio.open(output_path, "w", **meta) as dest:
            dest.write(clipped)

        os.remove(temp_output_path)

source_directory = 'data/zip'
target_directory = 'data/project/extracted_data'
clipped_directory = 'data/project/clipped'
shapefile_path = 'data/study_area/study_area_v3.shp'

extract_files(source_directory, target_directory, clipped_directory, shapefile_path)


In [None]:
def resample_band8(input_path, output_path):
    with rasterio.open(input_path) as src:

        new_width = src.width // 2
        new_height = src.height // 2

        data = src.read(
            out_shape=(src.count, new_height, new_width),
            resampling=Resampling.bilinear
        )

        transform = src.transform * src.transform.scale(
            (src.width / data.shape[-1]),
            (src.height / data.shape[-2])
        )
        new_meta = src.meta.copy()
        new_meta.update({
            "driver": "GTiff",
            "height": new_height,
            "width": new_width,
            "transform": transform
        })

        with rasterio.open(output_path, "w", **new_meta) as dst:
            dst.write(data)

def clip_raster(raster_path, shapes):
    with rasterio.open(raster_path) as src:
        out_image, out_transform = mask(src, shapes, crop=True)
        out_meta = src.meta.copy()
        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

        return out_image, out_meta

def process_files(file_target_directory, clipped_directory, rasterfileName, shapefile_path):
    shp = gpd.read_file(shapefile_path)
    geo = shp.geometry
    bands = ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11"]

    for band in bands:
        input_path = os.path.join(file_target_directory, f"{rasterfileName}_{band}.TIF")
        temp_output_path = os.path.join(file_target_directory, f"temp_{rasterfileName}_{band}.TIF")

        if band == "B8":
            resampled_path = os.path.join(file_target_directory, f"resampled_{rasterfileName}_{band}.TIF")
            resample_band8(input_path, resampled_path)
            input_path = resampled_path

        with rasterio.open(input_path) as src:
            transform, width, height = calculate_default_transform(
                src.crs, 'EPSG:3826', src.width, src.height, *src.bounds)

            kwargs = src.meta.copy()
            kwargs.update({
                'crs': 'EPSG:3826',
                'transform': transform,
                'width': width,
                'height': height
            })

            with rasterio.open(temp_output_path, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs='EPSG:3826',
                        resampling=Resampling.nearest)

        clipped, meta = clip_raster(temp_output_path, geo)
        raster_clipped_folder = os.path.join(clipped_directory, rasterfileName)
        os.makedirs(raster_clipped_folder, exist_ok=True)
        output_path = os.path.join(raster_clipped_folder, f"{rasterfileName}_{band}.tif")
        with rasterio.open(output_path, "w", **meta) as dest:
            dest.write(clipped)

        os.remove(temp_output_path)
        if band == "B8":
            os.remove(resampled_path)

source_directory = 'data/project/extracted_data'
clipped_directory = 'data/project/clipped'
shapefile_path = 'data/study_area/square_v2.shp'

for root, dirs, files in os.walk(source_directory):
    for dir_name in dirs:
        file_target_directory = os.path.join(root, dir_name)
        print('-----')
        process_files(file_target_directory, clipped_directory, dir_name, shapefile_path)


In [None]:
def calculate_ndvi_ndbi(r, nir, swir, th):
    with rasterio.open(r) as red_src:
        red = red_src.read(1).astype('float32')

    with rasterio.open(nir) as nir_src:
        nir = nir_src.read(1).astype('float32')

    with rasterio.open(swir) as swir_src:
        swir = swir_src.read(1).astype('float32')

    with rasterio.open(th) as th_src:
        th = th_src.read(1).astype('float32')

    ndvi = ((nir - red) / (nir + red))
    ndbi = ((swir - nir) / (swir + nir))
    lst = (1321.0789 / np.log((774.8853 / (0.0003342 * th + 0.1) ) + 1)) -273.15

    return ndvi, ndbi, lst

def save_tiff(data, file_path, meta):
    meta.update({"driver": "GTiff", "dtype": 'float32', "count": 1})
    with rasterio.open(file_path, "w", **meta) as dest:
        dest.write(data, 1)

def process_raster_data(clipped_directory):
    for folder_name in os.listdir(clipped_directory):
        folder_path = os.path.join(clipped_directory, folder_name)
        if os.path.isdir(folder_path):
            r = os.path.join(folder_path, f"{folder_name}_B4.tif")
            nir = os.path.join(folder_path, f"{folder_name}_B5.tif")
            swir = os.path.join(folder_path, f"{folder_name}_B6.tif")
            th = os.path.join(folder_path, f"{folder_name}_B10.tif")

            ndvi, ndbi, lst = calculate_ndvi_ndbi(r, nir, swir, th)

            with rasterio.open(r) as src:
                meta = src.meta.copy()

            save_tiff(ndvi, os.path.join(folder_path, f"{folder_name}_NDVI.tif"), meta)
            save_tiff(ndbi, os.path.join(folder_path, f"{folder_name}_NDBI.tif"), meta)
            save_tiff(lst, os.path.join(folder_path, f"{folder_name}_lst.tif"), meta)

clipped_directory = 'data/project/clipped'
process_raster_data(clipped_directory)


In [10]:
def standardize_band(band):
    mean = np.mean(band)
    std = np.std(band)
    return (band - mean) / std

def standardize_image(image_path, output_path):
    with rasterio.open(image_path) as src:
        image = src.read()
        standardized_image = np.zeros(image.shape, dtype=np.float32)

        for band in range(image.shape[0]):
            standardized_image[band] = standardize_band(image[band])

        with rasterio.open(
            output_path, 'w',
            driver='GTiff',
            height=standardized_image.shape[1],
            width=standardized_image.shape[2],
            count=standardized_image.shape[0],
            dtype=standardized_image.dtype,
            crs=src.crs,
            transform=src.transform
        ) as dst:
            dst.write(standardized_image)

def create_output_dir(base_output_dir, subdir):
    output_dir = os.path.join(base_output_dir, subdir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    return output_dir

def copy_special_files(src_folder, dst_folder, file_suffixes):
    for suffix in file_suffixes:
        special_file = os.path.join(src_folder, f"{os.path.basename(src_folder)}_{suffix}.tif")
        if os.path.exists(special_file):
            shutil.copy2(special_file, dst_folder)

def process_folder(input_folder, base_output_folder):
    special_files_suffixes = ["NDVI", "NDBI", "LST"]
    for subdir, dirs, files in os.walk(input_folder):
        output_folder = create_output_dir(base_output_folder, os.path.relpath(subdir, input_folder))
        copy_special_files(subdir, output_folder, special_files_suffixes)
        for file in files:
            if file.endswith('.tif') and not any(file.endswith(f"_{suffix}.tif") for suffix in special_files_suffixes):
                file_path = os.path.join(subdir, file)
                output_path = os.path.join(output_folder, file)
                standardize_image(file_path, output_path)


input_folder = 'data/project/test'
output_folder = 'data/project/test_norm'
process_folder(input_folder, output_folder)


In [19]:
import os
import random
import rasterio
from rasterio.windows import Window

def combine_bands(band_paths, output_path):
    bands = [rasterio.open(path) for path in band_paths]
    meta = bands[0].meta.copy()
    meta.update(count=len(bands))

    with rasterio.open(output_path, 'w', **meta) as dst:
        for id, band in enumerate(bands, start=1):
            dst.write(band.read(1), id)

def generate_random_tiles(img_width, img_height, max_x, max_y, tile_size, num_tiles):
    start_x = (img_width - max_x) // 2
    start_y = (img_height - max_y) // 2
    end_x = start_x + max_x
    end_y = start_y + max_y

    tiles = []
    for _ in range(num_tiles):
        x = random.randint(start_x, end_x - tile_size[0])
        y = random.randint(start_y, end_y - tile_size[1])
        tiles.append((x, y, tile_size[0], tile_size[1]))
    return tiles

def split_image(img_path, tiles, output_dir, date):
    tile_folder = os.path.join(output_dir, date)
    os.makedirs(tile_folder, exist_ok=True)

    with rasterio.open(img_path) as src:
        for count, (x, y, w, h) in enumerate(tiles):
            window = Window(x, y, w, h)
            window_transform = src.window_transform(window)
            tile_img = src.read(window=window)
            tile_meta = src.meta.copy()
            tile_meta.update({
                'height': window.height,
                'width': window.width,
                'transform': window_transform
            })

            tile_filename = os.path.join(tile_folder, f"{date}_{count:05d}.tif")
            with rasterio.open(tile_filename, 'w', **tile_meta) as dst:
                dst.write(tile_img)

def process_directory(base_dir, output_base_dir, num_tiles=100, tile_size=(64, 64)):
    center_area_size = (669, 613)
    tile_ranges = None
    # band_order = ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11", "NDVI", "NDBI", "LST"]
    band_order = ["lst"]

    for folder in os.listdir(base_dir):
        current_dir = os.path.join(base_dir, folder)
        if os.path.isdir(current_dir):
            date = folder.split('_')[3]

            band_paths = [os.path.join(current_dir, f"{folder}_{band}.tif") for band in band_order]
            combined_path = os.path.join(output_base_dir, 'satellite_images', f'{folder}.tif')
            os.makedirs(os.path.dirname(combined_path), exist_ok=True)
            combine_bands(band_paths, combined_path)

            with rasterio.open(combined_path) as img:
                img_width, img_height = img.width, img.height
                if tile_ranges is None:
                    tile_ranges = generate_random_tiles(img_width, img_height, *center_area_size, tile_size, num_tiles)

                split_image(combined_path, tile_ranges, output_base_dir, date)


base_dir = 'data/project/LST/' 
output_base_dir = 'data/project/test_split/'
process_directory(base_dir, output_base_dir)

In [23]:
import os
import random
import rasterio
from rasterio.windows import Window

def combine_bands(band_paths, output_path):
    bands = [rasterio.open(path) for path in band_paths]
    meta = bands[0].meta.copy()
    meta.update(count=len(bands))

    with rasterio.open(output_path, 'w', **meta) as dst:
        for id, band in enumerate(bands, start=1):
            dst.write(band.read(1), id)

def generate_random_tiles(img_width, img_height, max_x, max_y, tile_size, num_tiles):
    start_x = (img_width - max_x) // 2
    start_y = (img_height - max_y) // 2
    end_x = start_x + max_x
    end_y = start_y + max_y

    tiles = []
    for _ in range(num_tiles):
        x = random.randint(start_x, end_x - tile_size[0])
        y = random.randint(start_y, end_y - tile_size[1])
        tiles.append((x, y, tile_size[0], tile_size[1]))
    return tiles

def split_image(img_path, tiles, output_dir, date):
    tile_folder = os.path.join(output_dir, date)
    os.makedirs(tile_folder, exist_ok=True)

    with rasterio.open(img_path) as src:
        for count, (x, y, w, h) in enumerate(tiles):
            window = Window(x, y, w, h)
            window_transform = src.window_transform(window)
            tile_img = src.read(window=window)
            tile_meta = src.meta.copy()
            tile_meta.update({
                'height': window.height,
                'width': window.width,
                'transform': window_transform
            })

            tile_filename = os.path.join(tile_folder, f"{date}_{count:05d}.tif")
            with rasterio.open(tile_filename, 'w', **tile_meta) as dst:
                dst.write(tile_img)

def process_directory(base_dir, output_base_dir, num_tiles=10000, tile_size=(64, 64)):
    center_area_size = (669, 613)

    for filename in os.listdir(base_dir):
        if filename.endswith("_lst.tif"):
            img_path = os.path.join(base_dir, filename)
            date = filename.split('_')[3]

            with rasterio.open(img_path) as img:
                img_width, img_height = img.width, img.height
                tile_ranges = generate_random_tiles(img_width, img_height, *center_area_size, tile_size, num_tiles)
                split_image(img_path, tile_ranges, output_base_dir, date)

base_dir = 'data/project/LST/' 
output_base_dir = 'data/project/test_split/'
process_directory(base_dir, output_base_dir)

KeyboardInterrupt: 

In [11]:
import os

def write_filenames_to_txt(folder_path, train_txt, valid_txt, test_txt):
    for root, dirs, files in os.walk(folder_path):

        if len(files) >= 100:
            files.sort() 
            train_files = files[:60]
            valid_files = files[60:80]
            test_files = files[80:100]

            def write_to_file(file_list, file_path):
                with open(file_path, 'a') as file:
                    for filename in file_list:
                        rel_path = os.path.relpath(os.path.join(root, filename))
                        rel_path = rel_path.replace('\\', '/')
                        # rel_path = rel_path.replace('data/project/test_/', '')
                        file.write('./' + rel_path + '\n')

            write_to_file(train_files, train_txt)
            write_to_file(valid_files, valid_txt)
            write_to_file(test_files, test_txt)

folder_path = 'data/project/test_split' 
write_filenames_to_txt(folder_path, 'txt/train.txt', 'txt/valid.txt', 'txt/test.txt')


In [11]:
import os
import shutil

def find_lst_files(root_folder):
    lst_files = []
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            if 'lst' in file and file.endswith('.tif'):
                lst_files.append(os.path.join(root, file))
    return lst_files

def copy_files_to_new_directory(files, new_directory):
    if not os.path.exists(new_directory):
        os.makedirs(new_directory)
    
    for file in files:
        shutil.copy(file, new_directory)

root_folder = 'data/project/test_norm'
new_folder = './data/project/LST/'

lst_files = find_lst_files(root_folder)
copy_files_to_new_directory(lst_files, new_folder)
