In [1]:
import numpy as np
from numpy.matlib import repmat
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import cv2

In [2]:
#获得码率及尺寸
videoCapture = cv2.VideoCapture('road_video.MOV')
fps = videoCapture.get(cv2.CAP_PROP_FPS)
size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)),int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
#print(fps)
#print(size)

In [3]:
def kmeans(data, n_cl,centers,verbose=False,):
    n_samples = data.shape[0]
    #print("n_samples".format(n_samples))
    if centers is None:
        centers = data[np.random.choice(range(n_samples), size=n_cl)]  
    old_labels = np.zeros(shape = n_samples)
    while True:
        distances = np.zeros(shape=(n_samples, n_cl))
        for c_idx, c in enumerate(centers):
            distances[:, c_idx] = np.sum(np.square(data - repmat(c, n_samples, 1)), axis=1)
        new_labels = np.argmin(distances, axis=1)
        # re-estimate
        for l in range(0, n_cl):
            centers[l] = np.mean(data[new_labels == l], axis=0)
        if verbose:
            fig, ax = plt.subplots()
            ax.scatter(data[:, 0], data[:, 1], c=new_labels, s=40)
            ax.plot(centers[:, 0], centers[:, 1], 'r*', markersize=20)
            plt.waitforbuttonpress()
            plt.close()
        
        if np.all(new_labels == old_labels):
            break
        
        old_labels = new_labels
    return new_labels, centers

In [4]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
videoWriter = cv2.VideoWriter("output.mp4", fourcc,(fps/10), size, isColor=True)
videoWriter.isOpened()
success, frame = videoCapture.read()

In [5]:
n_cl = 8
times = 1
colormap = [[np.random.randint(256), np.random.randint(256), np.random.randint(256)] for _ in range(n_cl)]
centers = None
while success:
    img = np.float32(frame)
    h,w,c = img.shape
    row_indexes = np.arange(0, h)
    col_indexes = np.arange(0, w)
    coordinates = np.zeros(shape=(h, w, 2))
    coordinates[..., 0] = normalize(repmat(row_indexes, w, 1).T)
    coordinates[..., 1] = normalize(repmat(col_indexes, h, 1))
    data = np.concatenate((img, coordinates), axis=-1)
    data = np.reshape(data, newshape=(w * h, 5))
    #print(data)
    new_labels,centers = kmeans(data=data, n_cl=n_cl,centers=centers)
    frame = np.reshape([colormap[new_labels[i]] for i in range(len(new_labels))],(h,w,c)).astype('u1')
    videoWriter.write(frame)
    success, frame = videoCapture.read()
    print("This is {} frame".format(times))
    times = times + 1

videoCapture.release()
videoWriter.release()       

This is 1 frame
This is 2 frame
This is 3 frame
This is 4 frame
This is 5 frame
This is 6 frame
This is 7 frame
This is 8 frame
This is 9 frame
This is 10 frame
This is 11 frame
This is 12 frame
This is 13 frame
This is 14 frame
This is 15 frame
This is 16 frame
This is 17 frame
This is 18 frame
This is 19 frame
This is 20 frame
This is 21 frame
This is 22 frame
This is 23 frame
This is 24 frame
This is 25 frame
This is 26 frame
This is 27 frame
This is 28 frame
This is 29 frame
This is 30 frame
This is 31 frame
This is 32 frame
This is 33 frame
This is 34 frame
This is 35 frame


# Questions:

(1) 
Relatively simple to implement.

Guarantees convergence.

Easily adapts to new examples.

Generalizes to clusters of different shapes and sizes, such as elliptical clusters.

(2)
we has to specify k (the number of clusters) in the beginning.

k-means can only handle numerical data

Being dependent on initial values.

Clustering outliers.


(3) when there is a clear margin between each cluster.

# Report

I use eight classes, and the results can classify the car, road, and people. 
When we perform the k-means, if we enter the center points into the iteration, it will be much faster.