In [15]:
# Author: Ron Weiss <ronweiss@gmail.com>, Gael Varoquaux
# Modified by Thierry Guillemot <thierry.guillemot.work@gmail.com>
# License: BSD 3 clause

import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np

from sklearn import datasets
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import StratifiedKFold


import pandas as pandas



def readExcelSheet1(excelfile):
    from pandas import read_excel
    return (read_excel(excelfile)).values


def readExcelRange(excelfile,sheetname="Data",startrow=1,endrow=1,startcol=1,endcol=1):
    from pandas import read_excel
    values=(read_excel(excelfile, sheetname,header=None)).values;
    return values[startrow-1:endrow,startcol-1:endcol]

def readExcel(excelfile,**args):
    if args:
        data=readExcelRange(excelfile,**args)
    else:
        data=readExcelSheet1(excelfile)
    if data.shape==(1,1):
        return data[0,0]
    elif (data.shape)[0]==1:
        return data[0]
    else:
        return data

    

excelfile = "Assignment_5_Data_and_Template.xlsx"
data=readExcel(excelfile)
print(data.shape)
    
    
    
    
    
#print(__doc__)

colors = ['navy', 'turquoise', 'darkorange']


def make_ellipses(gmm, ax):
    for n, color in enumerate(colors):
        if gmm.covariance_type == 'full':
            covariances = gmm.covariances_[n][:2, :2]
        elif gmm.covariance_type == 'tied':
            covariances = gmm.covariances_[:2, :2]
        elif gmm.covariance_type == 'diag':
            covariances = np.diag(gmm.covariances_[n][:2])
        elif gmm.covariance_type == 'spherical':
            covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
        v, w = np.linalg.eigh(covariances)
        u = w[0] / np.linalg.norm(w[0])
        angle = np.arctan2(u[1], u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1],
                                  180 + angle, color=color)
        ell.set_clip_box(ax.bbox)
        ell.set_alpha(0.5)
        ax.add_artist(ell)

iris = datasets.load_iris()
#print('printing iris dataset')
#print((iris))
#print(len(iris.data))
#print(len(iris.target))
#print(len(iris.target_names))


# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(n_splits=4)
#print('printing skf')
#print(skf)

# Only take the first fold.
train_index, test_index = next(iter(skf.split(iris.data, iris.target)))
#print('printing train_index')
#print(len(train_index)) # 111 in length
#print((test_index))  # 39 in length



X_train = iris.data[train_index] # X contains feature vectors
#print('printing X_train')
#print(len(X_train)) # X_train is 111 in length

y_train = iris.target[train_index] # y contains labels
#print('printing y_train')
#print((y_train)) # y_train is 111 in length

X_test = iris.data[test_index]
y_test = iris.target[test_index]

labelTarget = ['F','M','C'] # 0 -> F, 1 -> M, 3 -> C
#n_classes = len(np.unique(y_train))
n_classes = len(np.unique(labelTarget))
#print('printing n_classes')
#print((n_classes)) # n_classes 3 the total classes


# Try GMMs using different types of covariances.
estimators = dict((cov_type, GaussianMixture(n_components=n_classes, covariance_type=cov_type,tol=0.001,max_iter=100,n_init=1,init_params='kmeans', random_state=None)) for cov_type in ['spherical', 'diag', 'tied', 'full'])
#print('printing estimators')
#print(estimators)
#estimators = {
#    'spherical': GaussianMixture(covariance_type='spherical', init_params='kmeans', max_iter=20, means_init=None, n_components=3, n_init=1, precisions_init=None, random_state=0, reg_covar=1e-06, tol=0.001, verbose=0, verbose_interval=10, warm_start=False, weights_init=None),
#    
#    'diag': GaussianMixture(covariance_type='diag', init_params='kmeans', max_iter=20, means_init=None, n_components=3, n_init=1, precisions_init=None, random_state=0, reg_covar=1e-06, tol=0.001, verbose=0, verbose_interval=10, warm_start=False, weights_init=None),
#    
#    'tied': GaussianMixture(covariance_type='tied', init_params='kmeans', max_iter=20, means_init=None, n_components=3, n_init=1, precisions_init=None, random_state=0, reg_covar=1e-06, tol=0.001, verbose=0, verbose_interval=10, warm_start=False, weights_init=None),
#    
#    'full': GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=20, means_init=None, n_components=3, n_init=1, precisions_init=None, random_state=0, reg_covar=1e-06, tol=0.001, verbose=0, verbose_interval=10, warm_start=False, weights_init=None)
#}

n_estimators = len(estimators)
#print('printing n_estimators')
#print(n_estimators)




plt.figure(figsize=(3 * n_estimators // 2, 6))
#print((3 * n_estimators // 2, 6))
plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05, left=.01, right=.99)

#plt.show()
result = [np.zeros(950*2).reshape(950,2),np.zeros(950*2).reshape(950,2),np.zeros(950*2).reshape(950,2),np.zeros(950*2).reshape(950,2)]
#print(result[0].shape)

    
for index, (name, estimator) in enumerate(estimators.items()):
    # Since we have class labels for the training data, we can
    # initialize the GMM parameters in a supervised manner.
#    estimator.means_init = np.array([X_train[y_train == i].mean(axis=0) for i in range(n_classes)])

    # Train the other parameters using the EM algorithm.
    estimator.fit(data)

#    print('printing estimator')
#    print(estimator)

    h = plt.subplot(2, n_estimators // 2, index + 1)
    make_ellipses(estimator, h)

    for n, color in enumerate(colors):
#        data = iris.data[iris.target == n]
        plt.scatter(data[:, 0], data[:, 1], s=0.8, color=color, label=labelTarget[n])
    # Plot the test data with crosses
#    for n, color in enumerate(colors):
#        data = X_test[y_test == n]
#        plt.scatter(data[:, 0], data[:, 1], marker='x', color=color)

    y_train_pred = estimator.predict(data)
    result[index][:,0] = y_train_pred
    y_train_pred_prob = estimator.predict_proba(data)
#    result[index][:,1] = y_train_pred_prob
    for n, b in enumerate(y_train_pred_prob):
#        temp1 = 0
        temp1 = np.argmax(b)
#        for i, o in enumerate(b):
#            np.argmax(b)
#            temp = i
#            if (i+1) <= len(b)-1:
#                if o > b[i+1]: temp = i + 1
#            temp1 = temp
#        print(b[temp1])    
#        print(b)    
        result[index][n][1] = b[temp1]
            
#    print((result[index]))
#    print((y_train_pred_prob[0]))
#    print(np.amax(y_train_pred_prob))
#    print(np.mean(y_train_pred_prob))
#    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
#    plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy, transform=h.transAxes)

#    y_test_pred = estimator.predict(X_test)
#    test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
#    plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy, transform=h.transAxes)

    plt.xticks(())
    plt.yticks(())
    plt.title(name)

plt.legend(scatterpoints=1, loc='lower right', prop=dict(size=12))
#print(result[3])

#label = np.chararray((950, 1))
#label = np.chararray((950))
label = np.empty([950,1], dtype=str)
lprob = np.zeros(950)
#print(label)
#print(lprob)

for i,b in enumerate(result[3]):
    if b[0] == 0: label[i][0] = 'F'
    elif b[0] == 1: label[i][0] = 'M'
    else: label[i][0] = 'C'
    lprob[i] = b[1]
    

#print(label)
#print(lprob)

#plt.show()

fCount = len(label[label=='F'])
mCount = len(label[label=='M'])
cCount = len(label[label=='C'])
print(fCount)
print(mCount)
print(cCount)

(950, 2)
422
243
285


In [16]:
def readExcelSheet1(excelfile):
    from pandas import read_excel
    return (read_excel(excelfile)).values


def readExcelRange(excelfile,sheetname="Data",startrow=1,endrow=1,startcol=1,endcol=1):
    from pandas import read_excel
    values=(read_excel(excelfile, sheetname,header=None)).values;
    return values[startrow-1:endrow,startcol-1:endcol]

def readExcel(excelfile,**args):
    if args:
        data=readExcelRange(excelfile,**args)
    else:
        data=readExcelSheet1(excelfile)
    if data.shape==(1,1):
        return data[0,0]
    elif (data.shape)[0]==1:
        return data[0]
    else:
        return data

    
def writeExcelData(x,excelfile,sheetname,startrow,startcol):
    from pandas import DataFrame, ExcelWriter
    from openpyxl import load_workbook
    df=DataFrame(x)
    book = load_workbook(excelfile)
    writer = ExcelWriter(excelfile, engine='openpyxl') 
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
    df.to_excel(writer, sheet_name=sheetname,startrow=startrow-1, startcol=startcol-1, header=False, index=False)
    writer.save()
    writer.close()


def getSheetNames(excelfile):
    from pandas import ExcelFile
    return (ExcelFile(excelfile)).sheet_names
    

# excelfile string of path to .xlsx file
excelfile = "Assignment_5_Data_and_Template.xlsx"





sheets = getSheetNames(excelfile) # array of sheet names
#print(sheets)



writeExcelData(label,excelfile,'Results',2,1) # label
writeExcelData(lprob,excelfile,'Results',2,2) # lprob
writeExcelData([fCount],excelfile,'Results',3,6) # fCount
writeExcelData([mCount],excelfile,'Results',2,6) # mCount
writeExcelData([cCount],excelfile,'Results',4,6) # cCount
