In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression

from ml_pipeline import *

# Code common to both datasets (MNIST and Million Song)

In [5]:
class BernNB(MLModel):
    def __init__(self):
        super(BernNB, self).__init__()
        self.class_name = "BernNB"
        self.nsamples_list = None
        
        # We don't need to check for features because
        # a) Our parent checks for it
        # b) We don't need it and we shouldn't have that knowledge encoded here, let our parent handle
        #    it in true OOP fashion
        input_data = {"Xtrain" : "", "ytrain" : "", "Xtest" : "", "ytest" : ""}
        input_json = json.dumps(input_data)
        output_data = {"model" : ""}
        output_json = json.dumps(output_data)
        
        self.istr_jsons = input_json
        self.ostr_jsons = output_json
        
    def do_run(self, input_data, traversal):
        if traversal == "POST":
            if not "model" in input_data.keys():
                input_data["model"] = ""
            return input_data
            
        Xtrain_data_name = input_data["Xtrain"]
        ytrain_data_name = input_data["ytrain"]
        Xtest_data_name = input_data["Xtest"]
        ytest_data_name = input_data["ytest"]
        input_data["Xtrain"] = self.do_read(Xtrain_data_name)
        self.do_delete(Xtrain_data_name)
        input_data["ytrain"] = self.do_read(ytrain_data_name)
        self.do_delete(ytrain_data_name)
        input_data["Xtest"] = self.do_read(Xtest_data_name)
        self.do_delete(Xtest_data_name)
        input_data["ytest"] = self.do_read(ytest_data_name)
        self.do_delete(ytest_data_name)
        
        dprint(DPRINT_INFO, "Bernoulli Naive Bayes: ")   
        if self.nsamples_list is None:
            self.nsamples_list = [input_data["Xtrain"].shape[0]]
        for nsamples in self.nsamples_list:
            BernNBclf = BernoulliNB(binarize=0.5)
            BernNBclf.fit(input_data["Xtrain"][0:nsamples], input_data["ytrain"][0:nsamples])
            accuracy = BernNBclf.score(input_data["Xtest"], input_data["ytest"])
            dprint(DPRINT_INFO, "\t\tAccuracy for: " + str(nsamples) + " samples is: " + str(accuracy))
            
        input_data["model"] = BernNBclf
        
        return input_data
        
class Logistic(MLModel):
    def __init__(self):
        super(Logistic, self).__init__()
        self.class_name = "Logistic"
        self.nsamples_list = None
           
        input_data = {"Xtrain" : "", "ytrain" : "", "Xtest" : "", "ytest" : ""}
        input_json = json.dumps(input_data)
        output_data = {"model" : ""}
        output_json = json.dumps(output_data)
        
        self.istr_jsons = input_json
        self.ostr_jsons = output_json
        
    def do_run(self, input_data, traversal):
        if traversal == "POST":
            if not "model" in input_data.keys():
                input_data["model"] = ""
            return input_data
        
        Xtrain_data_name = input_data["Xtrain"]
        ytrain_data_name = input_data["ytrain"]
        Xtest_data_name = input_data["Xtest"]
        ytest_data_name = input_data["ytest"]
        input_data["Xtrain"] = self.do_read(Xtrain_data_name)
        self.do_delete(Xtrain_data_name)
        input_data["ytrain"] = self.do_read(ytrain_data_name)
        self.do_delete(ytrain_data_name)
        input_data["Xtest"] = self.do_read(Xtest_data_name)
        self.do_delete(Xtest_data_name)
        input_data["ytest"] = self.do_read(ytest_data_name)
        self.do_delete(ytest_data_name)
       
        dprint(DPRINT_INFO, "Logistic Regression Classifier: ")
        if self.nsamples_list is None:
            self.nsamples_list = [input_data["Xtrain"].shape[0]]
        for nsamples in self.nsamples_list:
            logitclf = LogisticRegression()
            logitclf.fit(input_data["Xtrain"][0:nsamples], input_data["ytrain"][0:nsamples])
            accuracy = logitclf.score(input_data["Xtest"], input_data["ytest"])
            dprint(DPRINT_INFO, "Accuracy for: " + str(nsamples) + " samples is: " + str(accuracy))
            
        input_data["model"] = logitclf
        
        return input_data

class GaussNB(MLModel):
    def __init__(self):
        super(GaussNB, self).__init__()
        self.class_name = "GaussNB"
        self.nsamples_list = None
        
        input_data = {"Xtrain" : "", "ytrain" : "", "Xtest" : "", "ytest" : ""}
        input_json = json.dumps(input_data)
        output_data = {"model" : ""}
        output_json = json.dumps(output_data)
        
        self.istr_jsons = input_json
        self.ostr_jsons = output_json
        
    def do_run(self, input_data, traversal):
        if traversal == "POST":
            if not "model" in input_data.keys():
                input_data["model"] = ""
            return input_data
        
        Xtrain_data_name = input_data["Xtrain"]
        ytrain_data_name = input_data["ytrain"]
        Xtest_data_name = input_data["Xtest"]
        ytest_data_name = input_data["ytest"]
        input_data["Xtrain"] = self.do_read(Xtrain_data_name)
        self.do_delete(Xtrain_data_name)
        input_data["ytrain"] = self.do_read(ytrain_data_name)
        self.do_delete(ytrain_data_name)
        input_data["Xtest"] = self.do_read(Xtest_data_name)
        self.do_delete(Xtest_data_name)
        input_data["ytest"] = self.do_read(ytest_data_name)
        self.do_delete(ytest_data_name)
            
        dprint(DPRINT_INFO, "Gaussian Naive Bayes: ")   
        if self.nsamples_list is None:
            self.nsamples_list = [input_data["Xtrain"].shape[0]]
            
        for nsamples in self.nsamples_list:
            GaussNBclf = GaussianNB()
            GaussNBclf.fit(input_data["Xtrain"][0:nsamples], input_data["ytrain"][0:nsamples])
            accuracy = GaussNBclf.score(input_data["Xtest"], input_data["ytest"])
            dprint(DPRINT_INFO, "\t\tAccuracy for: " + str(nsamples) + " samples is: " + str(accuracy))
            
        input_data["model"] = GaussNBclf
        
        return input_data
            
