In [7]:

import pandas as pd
import numpy as np
import pickle
from sklearn import ensemble
from sklearn import model_selection, tree, linear_model
from sklearn.feature_selection import SelectFromModel
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [8]:
data = pd.read_csv("data/data2.csv")
X = data.drop(["Attrition","BusinessTravel", "Department", "EducationField","Gender","PerformanceRating"], axis=1).values
y = data['PerformanceRating'].values

In [9]:
feature_selector = ensemble.ExtraTreesClassifier().fit(X, y)
model = SelectFromModel(feature_selector, prefit=True)
X_new = model.transform(X)
feature_count = X_new.shape[1]

X_train, X_test, y_train, y_test = model_selection.train_test_split(X_new, y, test_size=0.30)

In [18]:

model_results = {}

models_params = {
    GaussianNB(): {},

    tree.DecisionTreeClassifier(): {
        'max_depth': list(range(1, 20)),
        'max_features': list(range(1, feature_count + 1))
    },
    
    MLPClassifier(): {
        'activation': ['tanh', 'relu', 'logistic'],
        'solver': ['sgd', 'adam'],
        'alpha': [1e-4, 1e-3, 1e-2],
        'max_iter': [50, 100, 150]
    },
    
    ensemble.RandomForestClassifier(): {
        'n_estimators': [10, 100, 1000],
        'max_depth': list(range(1, 20)),
        'max_features': list(range(1, feature_count + 1))
    }
}

In [None]:
from sklearn.grid_search import GridSearchCV

for model, params in models_params.iteritems():
    grid = GridSearchCV(estimator=model, param_grid=params, n_jobs=4)
    grid.fit(X_train, y_train)
    
    test_score = grid.best_estimator_.score(X_test, y_test)
    model_results[grid.best_estimator_] = test_score



In [17]:
best_model = max(model_results, key=model_results.get)

print best_model
print model_results[best_model]

GaussianNB(priors=None)
1.0


In [None]:
import pickle

with open('data/malware_detector.pkl', 'wb') as f:
    pickle.dump(best_model, f)

In [None]:
with open('data/malware_detector.pkl', 'rb') as f:
    model = pickle.load(f)
    
sample = X_test[550, :].reshape(1, -1)

print(u'Tahmin edilen sınıf: %d' % model.predict(sample)[0])
print(u'Confidence değerleri: %s' % model.predict_proba(sample)[0])