In [2]:
# !pip install numpy
# !pip install matplotlib
# !pip install sklearn

In [3]:
import struct
import numpy as np
from matplotlib import pyplot as plt
import math
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import Perceptron

In [17]:
size_record = 2952
char_offset = 0

def read_record(f):
    pixels = []
    s = f.read(size_record)
    str_data = struct.unpack_from('>2736s',s,216)[0]

    offset = 0
    while offset < len(str_data):

        byte = bin(int(str_data[offset]))[2:].zfill(8)
        pixels.append(byte[0:4])
        pixels.append(byte[4:8])
        offset = offset + 1
    pixels = [float(x) for x in pixels]
    return pixels

In [33]:
# 48: number of katakana characters
# 208: number of images for each katakana
# 50*50: image size
arr = np.zeros([48,208,50,50], dtype=np.float32)
filename = 'ETL5C'
with open(filename, 'rb') as f:
    i = 0
    char_offset = 0
    while i < 10608:
    # for i in range(500):
        pixels_arr = np.array(read_record(f))
        pic = np.reshape(pixels_arr,(76,72)).astype(np.float32)
        pic = pic[10:60,10:60]
        if math.floor(i/208) in [36,38,47]:
            if i in [36*208, 38*208, 47*208]:
                char_offset = char_offset + 1
            i = i + 1
            continue
        # print("{} {}".format(math.floor(i/208), char_offset))
        arr[math.floor(i/208)-char_offset,i%208]= pic
        i = i + 1

In [35]:
print(arr.shape)
print(arr[0,0,0,:])


(48, 208, 50, 50)
[10. 10. 10. 11. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10.  1. 10. 10.  1. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.]


In [14]:
def linearSVC(X,y,Xte,yte):
    print("Processing LinearSVC...")
    clf = LinearSVC(max_iter=2000)
    clf.fit(X, y)
    Xte_res = clf.predict(Xte)
    comp(Xte_res,yte)

def logisticReg(X,y,Xte,yte):
    print("Processing Logistic Regression...")
    logreg = LogisticRegression()
    logreg.fit(X, y)
    Xte_res = logreg.predict(Xte)
    comp(Xte_res,yte)

def naiveBayes(X,y,Xte,yte):
    print("Processing Gaussian Native Bayes...")
    nb = GaussianNB()
    nb.fit(X, y)
    Xte_res = nb.predict(Xte)
    comp(Xte_res,yte)

def kNeighbour(X,y,Xte,yte):
    print("Processing KNeighbour...")
    knn = KNeighborsClassifier()
    knn.fit(X, y)
    Xte_res = knn.predict(Xte)
    comp(Xte_res,yte)

def decisionTree(X,y,Xte,yte):
    print("Processing Decision Tree...")
    tree = DecisionTreeClassifier()
    tree.fit(X, y)
    Xte_res = tree.predict(Xte)
    comp(Xte_res,yte)

def perceptron(X,y,Xte,yte):
    print("Processing Perceptron...")
    percep = Perceptron()
    percep.fit(X, y)
    Xte_res = percep.predict(Xte)
    comp(Xte_res,yte)
    
def comp(Xte_res,yte):
    
    global char_offset
    
    match_num = 0
    match_list = np.zeros(48)
    
    for i in range(len(Xte_res)):
        if Xte_res[i] == yte[i]:
            match_list[int(yte[i])] = match_list[int(yte[i])] + 1
            match_num = match_num + 1
        print("{} ------ {}".format(yte[i],Xte_res[i]))
    
                          
    for i in range(len(match_list)):
        print("Character = {}, Correct = {}, % = {}".format(i, match_list[i], (match_list[i]/10)* 100))
    
    print("All {} test samples, {} matches. Predict rate is {}.".format(len(yte),match_num,match_num/len(yte)))   
    
def save_image(pic,idx):
    plt.imshow(samples[100], cmap='gray', interpolation='nearest')
    plt.savefig("pic{:04d}.png".format(idx)) #save image as file
    print("{:04d} finished.".format(idx))
    

In [12]:
def training(arr, classifier):
    # prepare X and y
    train_sz = 198
    test_sz = 10
    X = np.zeros([train_sz*48, 50*50], dtype=np.float32)
    Xte = np.zeros([test_sz*48, 50*50], dtype=np.float32)

    # get training matrix X, training label y, testing matrix Xte, testing label yte
    for i in range(48):
         train_pics = arr[i,0:train_sz,:,:]
         train_tuples = np.reshape(train_pics,(train_sz,50*50))
         X[i*train_sz:(i+1)*train_sz , :] = train_tuples

         test_pics = arr[i,train_sz:train_sz+test_sz,:,:]
         test_tuples = np.reshape(test_pics,(test_sz,50*50))
         Xte[i*test_sz:(i+1)*test_sz,:] = test_tuples

    y = np.zeros([train_sz*48])
    yte = np.zeros([test_sz*48])

    for i in range(train_sz*48):
        y[i] = math.floor(i/train_sz)

    for i in range(test_sz*48):
        yte[i] = math.floor(i/test_sz)
    if (classifier == 0):
        kNeighbour(X,y,Xte,yte)
    elif (classifier == 1):
        linearSVC(X,y,Xte,yte)
    elif (classifier == 2):
        logisticReg(X,y,Xte,yte)
    elif (classifier == 3):
        naiveBayes(X,y,Xte,yte)
    elif (classifier == 4):
        decisionTree(X,y,Xte,yte)
    elif (classifier == 5):
        perceptron(X,y,Xte,yte)

In [15]:
training(arr,0) #KNeighbour

Processing KNeighbour...
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 18.0
0.0 ------ 0.0
0.0 ------ 24.0
0.0 ------ 0.0
0.0 ------ 7.0
0.0 ------ 0.0
0.0 ------ 20.0
0.0 ------ 0.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 0.0
1.0 ------ 1.0
1.0 ------ 17.0
1.0 ------ 1.0
2.0 ------ 2.0
2.0 ------ 27.0
2.0 ------ 45.0
2.0 ------ 43.0
2.0 ------ 27.0
2.0 ------ 43.0
2.0 ------ 2.0
2.0 ------ 38.0
2.0 ------ 17.0
2.0 ------ 7.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 36.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 18.0
3.0 ------ 29.0
4.0 ------ 4.0
4.0 ------ 33.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 37.0
4.0 ------ 24.0
5.0 ------ 5.0
5.0 ------ 24.0
5.0 ------ 24.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 24.0
5.0 ------ 17.0
5.0 ------ 7.0
6.0 ------ 6.0
6.0 ------ 6.0
6.0 ------ 6.0
6.0 -----

In [10]:
training(arr,1) #Linear Regression

Processing LinearSVC...
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 46.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 9.0
0.0 ------ 30.0
0.0 ------ 4.0
0.0 ------ 46.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 46.0
1.0 ------ 1.0
1.0 ------ 33.0
1.0 ------ 44.0
1.0 ------ 7.0
1.0 ------ 1.0
1.0 ------ 1.0
2.0 ------ 2.0
2.0 ------ 13.0
2.0 ------ 43.0
2.0 ------ 7.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 5.0
2.0 ------ 38.0
2.0 ------ 27.0
2.0 ------ 7.0
3.0 ------ 45.0
3.0 ------ 45.0
3.0 ------ 45.0
3.0 ------ 34.0
3.0 ------ 36.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 45.0
3.0 ------ 3.0
4.0 ------ 4.0
4.0 ------ 37.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 33.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 23.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 37.0
5.0 ------ 42.0
5.0 ------ 22.0
6.0 ------ 2.0
6.0 ------ 6.0
6.0 ------ 6.0
6.0 -----

In [16]:
training(arr,3) #NaiveBayes

Processing Gaussian Native Bayes...
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 27.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 27.0
0.0 ------ 27.0
0.0 ------ 15.0
0.0 ------ 0.0
1.0 ------ 18.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 43.0
2.0 ------ 7.0
2.0 ------ 7.0
2.0 ------ 2.0
2.0 ------ 30.0
2.0 ------ 27.0
2.0 ------ 18.0
3.0 ------ 45.0
3.0 ------ 45.0
3.0 ------ 21.0
3.0 ------ 21.0
3.0 ------ 45.0
3.0 ------ 33.0
3.0 ------ 21.0
3.0 ------ 21.0
3.0 ------ 21.0
3.0 ------ 21.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 12.0
4.0 ------ 12.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 33.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 27.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 31.0
5.0 ------ 31.0
5.0 ------ 5.0
5.0 ------ 31.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 21.0
6.0 ------ 44.0
6.0 ------ 34.0
6.0 -

In [17]:
training(arr,4) #DecisionTree

Processing Decision Tree...
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 24.0
0.0 ------ 30.0
1.0 ------ 24.0
1.0 ------ 36.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 15.0
2.0 ------ 2.0
2.0 ------ 30.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 7.0
2.0 ------ 34.0
2.0 ------ 43.0
2.0 ------ 2.0
3.0 ------ 21.0
3.0 ------ 45.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 36.0
3.0 ------ 3.0
3.0 ------ 36.0
3.0 ------ 3.0
3.0 ------ 45.0
3.0 ------ 3.0
4.0 ------ 4.0
4.0 ------ 33.0
4.0 ------ 33.0
4.0 ------ 12.0
4.0 ------ 4.0
4.0 ------ 1.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
5.0 ------ 32.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 27.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
6.0 ------ 16.0
6.0 ------ 6.0
6.0 ------ 6.0
6.0 ----

In [18]:
training(arr,5) #Perceptron

Processing Perceptron...
0.0 ------ 0.0
0.0 ------ 46.0
0.0 ------ 46.0
0.0 ------ 0.0
0.0 ------ 46.0
0.0 ------ 0.0
0.0 ------ 9.0
0.0 ------ 0.0
0.0 ------ 0.0
0.0 ------ 46.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 46.0
1.0 ------ 1.0
1.0 ------ 33.0
1.0 ------ 3.0
1.0 ------ 1.0
1.0 ------ 1.0
1.0 ------ 7.0
2.0 ------ 2.0
2.0 ------ 13.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 2.0
2.0 ------ 5.0
2.0 ------ 38.0
2.0 ------ 27.0
2.0 ------ 43.0
3.0 ------ 45.0
3.0 ------ 45.0
3.0 ------ 45.0
3.0 ------ 34.0
3.0 ------ 36.0
3.0 ------ 45.0
3.0 ------ 3.0
3.0 ------ 3.0
3.0 ------ 45.0
3.0 ------ 3.0
4.0 ------ 4.0
4.0 ------ 37.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 4.0
4.0 ------ 23.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 4.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 5.0
5.0 ------ 37.0
5.0 ------ 29.0
5.0 ------ 22.0
6.0 ------ 2.0
6.0 ------ 6.0
6.0 ------ 6.0
6.0 ----

In [None]:
training(arr,2) #Logistic Regression

Processing Logistic Regression...


