In [1]:
import os

os.environ["OMP_NUM_THREADS"] = '1'
import cv2
import geopandas as gpd
import numpy as np
from shapely.geometry import LineString, Point, Polygon
from sklearn.cluster import KMeans
import rasterio
from tqdm import tqdm

In [2]:
# 1. 文件路径

# input_raster_thin = r"D:\UAV_DATA_NEW\output\4_thin\061410_thin.tif"
input_raster = r"D:\UAV_DATA_NEW\output\2_dilated\061301_dilated.tif"
output_clustering_kmeans_result = r'D:\UAV_DATA_NEW\output\5_result_pixels\061301_kmeans_pointprocessed.tif'
output_centroid = r'D:\UAV_DATA_NEW\output\5_result_centroids\061301_kmeans_centroid_pointprocessed.shp'

In [3]:
src = rasterio.open(input_raster)
transform = src.transform

area_per_pixel = abs(transform[0] * transform[4])
mean_area = 0.313748971
mean_area_pixel = mean_area / area_per_pixel
print(mean_area_pixel, area_per_pixel)

61.38802795261512 0.005110914643522676


In [4]:
img = src.read(1)
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img, connectivity=4)
img_shape = img.shape
img = None

output_img = np.zeros(img_shape, dtype=np.uint8)

n_clusters = np.round(stats[:, 4] / mean_area_pixel).astype(int)
n_clusters[n_clusters == 0] = 1

all_points = []

In [5]:
# KMeans
with tqdm(range(num_labels - 1), desc='Progress') as tbar:
    for i in range(1, num_labels):
        y0, x0, h, w, num_pixels = stats[i]

        # 跳过过小的块
        if num_pixels < 4:
            continue

        x, y = np.where(labels[x0:x0 + w, y0:y0 + h] == i)
        x += x0
        y += y0

        coords = np.stack((x, y), axis=1)
        k = n_clusters[i]
        kmeans = KMeans(n_clusters=k, n_init='auto', random_state=0)
        kmeans.fit(coords)

        output_img[x, y] = kmeans.labels_ + 1

        cluster_centers = kmeans.cluster_centers_

        if k <= 2:
            all_points.extend(Point(src.xy(*i)) for i in cluster_centers)
        else:
            centeroids = []
            for index, center in enumerate(cluster_centers):
                centroid = Point(src.xy(*center))
                distances = [centroid.distance(i) for i in centeroids]
                if distances:
                    min_dist = min(distances)
                    if min_dist > 0.45:
                        centeroids.append(centroid)
                    else:
                        point = centeroids.pop(distances.index(min_dist))
                        new_x = (point.x + centroid.x) / 2
                        new_y = (point.y + centroid.y) / 2
                        centeroids.append(Point(new_x, new_y))
                else:
                    centeroids.append(centroid)

            all_points.extend(centeroids)

        tbar.update()


Progress: 100%|█████████▉| 179546/179639 [02:05<00:00, 1433.82it/s]


In [6]:
with rasterio.open(output_clustering_kmeans_result, 'w', **src.meta) as dst:
    dst.write(output_img, 1)

In [7]:
geometry = gpd.GeoSeries(all_points, crs=src.crs)
geometry.to_file(output_centroid, driver='ESRI Shapefile')