In [1]:
from cropclassification.sampling_techniques import clustered_sampler
from cropclassification.sampling_techniques import gridded_sampler
from cropclassification.sampling_techniques import random_sampler
from cropclassification.sampling_techniques import stratified_sampler
from cropclassification.sampling_techniques import samples_size_calculator
from cropclassification.sampling_techniques import sample_extracter
from cropclassification.sampling_techniques import test_dateset_extracter
from importlib import reload

## 1. Sampling Techniques to Create Training Samples
This code uses four sampling techniques to build high-quality samples to train a classification model and extract the values to point:
- Clustered sampling
- Gridded sampling
- Random sampling
- Stratified sampling


## 1.1 Calculate the number of samples based on the total number of pixels in the HLS image

In [2]:
# Define the directory to locate the reclassified CDL to calculate the total number of pixels per class
cld_reclassified = r'C:\crop_classification\pre_process\cropscape\cdl_reclassified.tif'
# Define the directory to locate the HLS image to use as base to count the total number of pixels
hls_base_pixels = r'C:\crop_classification\pre_process\hls_organized\20220301\20220301_S30_T15SYT_B02.tif'
# Calculate the total number of pixels per class in the CDL file
pixels_per_cdl_class, _ = samples_size_calculator.count_samples_per_class(cld_reclassified)
# Calculate the total number of pixels in the CDL image
_,total_cld_pixels = samples_size_calculator.count_samples_per_class(hls_base_pixels)
# Define the percentage number of samples
percentage_samples = 0.05
# Calculate the total number of samples according to the percentage value and the total number of pixels in the HLS image
total_samples = samples_size_calculator.calculate_total_samples(percentage_samples, total_cld_pixels)
# Define a minimum number of samples per class
min_samples_per_class = 100
print(pixels_per_cdl_class, total_cld_pixels, total_samples)
sample_sizes_dict = samples_size_calculator.calculate_sample_sizes(pixels_per_cdl_class, total_samples, min_samples_per_class)
print(sample_sizes_dict)


{0: 802685, 1: 781411, 2: 134046, 3: 2352, 4: 4453681, 5: 37825, 6: 7190921} 13395600 6698
{0: 100, 1: 319, 2: 638, 3: 957, 4: 1276, 5: 1595, 6: 1813}


### 1.2 Clustered Sampling Technique
This function will create a two-stage crusted sampling design, where the CDL is used to identify regions with pixels belonging to the same classes and obtain good spatial distribution of the samples, which serves as an indicator of good representativeness for the crop type class. To create the samples, the cluster configuration used is considerate a block with at least 100 pixels of the same classes and a minimum of 5 clusters together with a density of 0.5.

In [5]:
# Define a base directory to storage the sample points
base_dir = r'C:\crop_classification'
# Define the output shapefile name to be storage in the results folder
output_name = 'clustered_sampling.shp'
# Define the number of samples per class manually if needed
#sample_sizes_dict= {0: 957, 1: 957, 2: 957, 3: 957, 4: 957, 5: 957, 6: 957}
# Parameters to build the clusters
min_pixels = 100
eps = 10
min_samples = 50
min_cluster_size = 5
min_density = 0.5

# Run the function to create clustered sample points
clustered_sampler.clustered_sampling(
    cld_reclassified,
    base_dir,
    sample_sizes_dict,
    min_pixels,
    eps,
    min_samples,
    min_cluster_size,
    min_density,
    output_name
    )

Unnamed: 0,geometry,value
0,POINT (768794.617 3703018.460),0
1,POINT (749894.617 3763708.460),0
2,POINT (725594.617 3747808.460),0
3,POINT (736394.617 3792778.460),0
4,POINT (745664.617 3763948.460),0
...,...,...
6693,POINT (752564.617 3775768.460),6
6694,POINT (777224.617 3695488.460),6
6695,POINT (802304.617 3720928.460),6
6696,POINT (759314.617 3795388.460),6


### 1.3 Gridded Sampling Technique
This function will create a grid samples based on the distance between samples, defined on the equal distribution of points, considering the total number of desired samples within the HLS tile dimensions.

In [4]:
# Gridded Sampling Technique
# Specify the output directory
base_dir = r'C:\crop_classification'
# Rename the output shapefile
output_shapefile = 'gridded_samples.shp'
# Define a buffer size to avoid the border effect
buffer_size = 10
# Run the function to create the gridded sampling points and export to shapefile
gridded_sampler.gridded_sampling(base_dir, cld_reclassified, total_samples, output_shapefile, buffer_size=buffer_size)


### 1.4 Random Sampling Technique
This function will randomly distribute the samples across the HLS tile dimensions.

In [6]:
# Random Sampling Technique
base_dir = r'C:\crop_classification'
# Generate random points with a buffer around the edges and assign class ID
output_name = 'random_sampling.shp'
# Define a buffer size to avoid the border effect
buffer_size = 10
# Apply the function to create the randon sampling
random_sampler.randon_sampling(total_samples, base_dir, buffer_size, cld_reclassified, output_name)

### 1.5 Stratified Sampling Technique
This function will create a defined number of samples for each class based on the CDL image.

In [7]:
# Stratified Sampling Technique
base_dir = r'C:\crop_classification'
# Export the GeoDataFrame as a shapefile
output_name =  'stratified_sampler.shp'
# Define the number of samples per class manually if needed
#sample_sizes_dict= {0: 957, 1: 957, 2: 957, 3: 957, 4: 957, 5: 957, 6: 957}
# Define a buffer size to avoid the border effect
buffer_size = 10
# Generate random points with a buffer around the edges and assign class ID
stratified_sampler.stratified_sampling(cld_reclassified, base_dir, sample_sizes_dict, buffer_size, output_name)


### 1.6 Extract the training samples to the sample points
This part of the code will locate the spectral indices and coefficient of variation and extract the values to points to create a training sample and export it as an Excel file

In [8]:
# Define a base directory to locate the hls features and storage the sampling points
base_dir = r'C:\crop_classification'
# Define a directory to locate the shapefile sampling points
samples_points_dir = r'C:\crop_classification\results\sample_points'
# Run the function to extract the training samples
sample_extracter.extract_training_samples(base_dir, samples_points_dir)



C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_evi_p10.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_evi_p25.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_evi_p50.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_evi_p75.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_evi_p90.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_nbr_p10.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_nbr_p25.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_nbr_p50.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_nbr_p75.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_nbr_p90.tif
C:\crop_classification\temporal_composites\inputdata\spectral_indexes\03\03_ndbi_p10.tif
C:\crop_classification\temporal

## 1.7 Create a test dataset to evaluate the model
This function will create an independent dataset to training the model win the stratified sampling technique and 0.05% of the total number of HLS pixels

In [9]:
# Stratified Sampling Technique
base_dir = r'C:\crop_classification'
# Export the GeoDataFrame as a shapefile
output_name =  'test_dataset.shp'
# Define the number of samples per class manually if needed
#sample_sizes_dict= {0: 957, 1: 957, 2: 957, 3: 957, 4: 957, 5: 957, 6: 957}
# Define a buffer size to avoid the border effect
buffer_size = 10
test_dateset_extracter.test_dataset(cld_reclassified, base_dir, sample_sizes_dict, buffer_size, output_name)