In [15]:
import numpy as np
import cv2
import pickle
import os
from PIL import Image
import time
import copy
class ReSample:
    @staticmethod
    def resample(X, ratio=2, mode='LANCZOS'):
        image_list = []
        for i in range(X.shape[0]):
            tmp = []
            for k in range(X.shape[-1]):
                size1 = int(X.shape[1]/ratio)
                size2 = int(X.shape[2]/ratio)
                if mode == "NEAREST" or mode == 0:
                    image_tmp = Image.fromarray(X[i,:,:,k]).resize(size=(size1, size2), resample=Image.NEAREST)
                elif mode == "BILINEAR" or mode == 1:
                    image_tmp = Image.fromarray(X[i,:,:,k]).resize(size=(size1, size2), resample=Image.BILINEAR)
                elif mode == "BICUBIC" or mode == 2:
                    image_tmp = Image.fromarray(X[i,:,:,k]).resize(size=(size1, size2), resample=Image.BICUBIC)
                else:
                    image_tmp = Image.fromarray(X[i,:,:,k]).resize(size=(size1, size2), resample=Image.LANCZOS)
                tmp.append(np.array(image_tmp).reshape(1, size1, size2, 1))
            image_list.append(np.concatenate(tmp, axis=-1))
        output = np.concatenate(image_list, axis=0)
        #output[output < 0] = 0
        #output[output > 255] = 255
        return np.round(output)

def ReSize(Y, S):
    while Y.shape[2] > S:
        Y = ReSample.resample(Y,2)
    while Y.shape[2] < S:
        Y = ReSample.resample(Y,1/2)
    return Y

class toFile:
    def __init__(self, folder, n_images_per_file, datatype="SHORT", H=256, W=256, C=3):
        self.folder = folder
        self.n_images_per_file = n_images_per_file
        self.datatype = datatype
        self.H = H
        self.W = W
        self.C = C
        
        self.ct = 0
        self.cache = []
        self.fast = True
        
    def write_UNCHAR(self, X, file, offset=128, truewrite=True):
        X += offset
        X[X<0] = 0
        X[X>255] = 255
#         print(np.mean(X), np.min(X), np.max(X))
#         print('expected size %d bytes'%(X.shape[0]*X.shape[1]*X.shape[2]*X.shape[3]))
        if truewrite == True:
            if self.fast == True:
                Xt = copy.deepcopy(X)
                Xt = Xt.astype(np.int8).transpose((0, 3, 1, 2)).reshape(-1)
                with open(file+'.data', 'wb') as f:
                    f.write(Xt)
            else:
                with open(file+'.data', 'wb') as f:
                    for k in range(X.shape[0]):
                        for c in range(X.shape[3]):
                            for i in range(X.shape[1]):
                                for j in range(X.shape[2]):
                                    a = int(X[k,i,j,c])
                                    f.write(a.to_bytes(1, 'little'))
        return X

    def write_SHORT(self, X, file, truewrite=True):
#         print(np.mean(X), np.min(X), np.max(X))
#         print('expected size %d bytes'%(X.shape[0]*X.shape[1]*X.shape[2]*X.shape[3]))
        if truewrite == True:
            if self.fast == True:
                Xt = copy.deepcopy(X)
                Xt = Xt.astype(np.int16).transpose((0, 3, 1, 2)).reshape(-1)
                with open(file+'.data', 'wb') as f:
                    f.write(Xt)
            else:
                with open(file+'.data', 'wb') as f:
                    for k in range(X.shape[0]):
                        for c in range(X.shape[3]):
                            for i in range(X.shape[1]):
                                for j in range(X.shape[2]):
                                    a = int(X[k,i,j,c])
                                    f.write(a.to_bytes(2, 'little', signed=True))
        return X

    
    def tofile(self, X):
        if X.shape[1] != self.H:
            return False
        if X.shape[2] != self.W:
            return False
        if X.shape[3] != self.C:
            return False
        a = self.ct
        if len(self.cache) > 0:
            X = np.concatenate([self.cache, X], axis=0)
            self.cache = []
        for i in range(0, X.shape[0], self.n_images_per_file):
            if len(X[i:i+self.n_images_per_file]) != self.n_images_per_file:
                self.cache = X[i:i+self.n_images_per_file]
            
                break
            if self.datatype == "SHORT":
                print('write',self.ct)
                self.write_SHORT(X[i:i+self.n_images_per_file], self.folder+'/'+str(self.ct))
            elif self.datatype == "UNCHAR":
                self.write_UNCHAR(X[i:i+self.n_images_per_file], self.folder+'/'+str(self.ct))
            else:
                assert 1==0
            self.ct += 1
#         print('<INFO> write %d files, current total files %d, %d images in buffer'%(self.ct-a, self.ct, len(self.cache)))
        return True


In [16]:
class PreProcess():
    def __init__(self, output_folder, n_images_per_file, size_list=[8,16,32,64,128,256], stride=123, downsample_level=3):
        self.g_count = 0
        self.output_folder = output_folder
        self.size_list = size_list
        self.n_images_per_file = n_images_per_file
        # output folder has a series subfolder 8/16/32/64/128/256
        os.system('mkdir '+self.output_folder)
        for i in size_list:
            os.system('mkdir '+self.output_folder+'/'+str(i))
        self.tofile = []
        for i in size_list:
            self.tofile.append(toFile(self.output_folder+'/'+str(i)+'/', n_images_per_file, H=i, W=i))
        self.stride = stride
        self.downsample_level = downsample_level
    
    def patch_to_file(self, patch):
        for i in range(len(self.size_list)):
            patch = ReSize(patch, self.size_list[i])
            flag = self.tofile[i].tofile(patch)
            if flag == False:
                return 
#             flag = cv2.imwrite(self.output_folder+'/'+str(i)+'/'+str(self.g_count)+'.png', patch[0])
        self.g_count += 1
        
    def crop_image(self, x):
        for i in range(0, x.shape[0], self.stride):
            for j in range(0, x.shape[1], self.stride):
                patch = x[i:i+256, j:j+256]
                if patch.shape[0] == 256 and patch.shape[1] == 256:
                    self.patch_to_file(patch.reshape(1, patch.shape[0], patch.shape[1], -1))

    
    def process_folder(self, folder):
        init_count, t0 = self.g_count, time.time()
        files = os.listdir(folder)
        for file in files:
            try:
                x = cv2.imread(folder+'/'+file)
                x.shape
            except:
                continue
            x = x.reshape(1, x.shape[0], x.shape[1], -1)
            for level in range(self.downsample_level+1):
                self.crop_image(x[0])
                x = ReSize(x, x.shape[1]//2)
        print('Processed folder=%s, get %d images using %f sec, total images=%d'
                  %(folder, self.g_count-init_count, time.time()-t0, self.g_count))
        

In [17]:
pp = PreProcess(output_folder='/Users/alex/Desktop/data', n_images_per_file=1000, stride=256, downsample_level=0) 

In [18]:
pp.process_folder('/Users/alex/Desktop/proj/data/test1024/')

write 0
write 0
write 0
write 0
write 0
write 0
write 1
write 1
write 1
write 1
write 1
write 1
write 2
write 2
write 2
write 2
write 2
write 2
Processed folder=/Users/alex/Desktop/proj/data/test1024/, get 3720 images using 511.639452 sec, total images=3720


In [20]:
from Distributed_KMeans import *
dkm = Distributed_KMeans(n_clusters=16, 
                         size=256, 
                         win=4, 
                         datatype='SHORT', 
                         frame_each_file=1000, 
                         n_frames=200, max_iter=10000, max_err=1e-7)

In [None]:
dkm.fit('/Users/alex/Desktop/data/256/', ['0.data', '1.data'])

b'Iter = 1 sum2 = 174928198287 RMSE=33.611707 PSNR=17.600992 H=3.342348 B=4327355.162843 (For 1080p@29.97fps)\n'
b'Iter = 2 sum2 = 49420739714 RMSE=17.865504 PSNR=23.090498 H=3.523937 B=4562459.566496 (For 1080p@29.97fps)\n'
b'Iter = 3 sum2 = 42382231277 RMSE=16.544459 PSNR=23.757752 H=3.616707 B=4682569.814371 (For 1080p@29.97fps)\n'
b'Iter = 4 sum2 = 39389660287 RMSE=15.949673 PSNR=24.075768 H=3.673368 B=4755928.660725 (For 1080p@29.97fps)\n'
b'Iter = 5 sum2 = 37660826728 RMSE=15.595726 PSNR=24.270692 H=3.706734 B=4799128.060812 (For 1080p@29.97fps)\n'
b'Iter = 6 sum2 = 36582943611 RMSE=15.370924 PSNR=24.396804 H=3.729375 B=4828442.027638 (For 1080p@29.97fps)\n'
b'Iter = 7 sum2 = 35878344496 RMSE=15.222180 PSNR=24.481266 H=3.745301 B=4849060.736932 (For 1080p@29.97fps)\n'
b'Iter = 8 sum2 = 35464594742 RMSE=15.134155 PSNR=24.531640 H=3.754109 B=4860464.368548 (For 1080p@29.97fps)\n'
b'Iter = 9 sum2 = 35197607787 RMSE=15.077080 PSNR=24.564459 H=3.758995 B=4866791.110653 (For 1080p@29.9

b'Iter = 74 sum2 = 32754789065 RMSE=14.544476 PSNR=24.876842 H=3.848584 B=4982781.691270 (For 1080p@29.97fps)\n'
b'Iter = 75 sum2 = 32754161519 RMSE=14.544336 PSNR=24.876925 H=3.848887 B=4983174.334516 (For 1080p@29.97fps)\n'
b'Iter = 76 sum2 = 32753194073 RMSE=14.544121 PSNR=24.877054 H=3.849240 B=4983631.800938 (For 1080p@29.97fps)\n'
b'Iter = 77 sum2 = 32752420611 RMSE=14.543950 PSNR=24.877156 H=3.849535 B=4984013.331639 (For 1080p@29.97fps)\n'
b'Iter = 78 sum2 = 32751950162 RMSE=14.543845 PSNR=24.877219 H=3.849682 B=4984203.788308 (For 1080p@29.97fps)\n'
b'Iter = 79 sum2 = 32751682445 RMSE=14.543786 PSNR=24.877254 H=3.849781 B=4984331.582571 (For 1080p@29.97fps)\n'
b'Iter = 80 sum2 = 32751446563 RMSE=14.543733 PSNR=24.877285 H=3.849945 B=4984544.264329 (For 1080p@29.97fps)\n'
b'Iter = 81 sum2 = 32751207747 RMSE=14.543680 PSNR=24.877317 H=3.850005 B=4984621.743460 (For 1080p@29.97fps)\n'
b'Iter = 82 sum2 = 32750486987 RMSE=14.543520 PSNR=24.877413 H=3.850116 B=4984765.897859 (For 10