In [1]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from matplotlib import style
style.use('seaborn-white')
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.externals import joblib 

columns = ['id','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape',
           'Marginal Adhesion','Single Epithelial','Cell Size','Bare Nuclei','Bland Chromatin','Normal Nucleoli'
           'Mitoses','Class']
dataset_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data'
data = pd.read_csv(dataset_url, sep=',', names=columns).replace('?', -99999)
 
data.drop(['id','Class'],axis=1).plot(kind='hist', subplots=True, layout=(4,2), sharex=True, sharey=True)
plt.show()

In [2]:
imp = preprocessing.Imputer(missing_values=-99999, strategy='mean', axis=0)
data = pd.DataFrame(imp.fit_transform(data),index = data.index, columns = data.columns)

In [3]:
y = data.Class
X = data.drop(['Class','id'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
pipeline = make_pipeline(preprocessing.StandardScaler(), RandomForestRegressor(n_estimators=100, verbose = 0, n_jobs = -1))
 
hyperparameters = { 'randomforestregressor__max_features' : ['auto', 'sqrt', 'log2'],
'randomforestregressor__max_depth': [None, 7, 5, 3]}

In [5]:
model = GridSearchCV(pipeline, hyperparameters, cv=10, verbose = 0, n_jobs = -1)
model.fit(X_train, y_train)
pred = pd.DataFrame(model.predict(X_test),index = X_test.index, columns = ['prediction'])

In [6]:
print(model.best_params_)
print("r2 score:" + str(r2_score(y_test, pred)))

{'randomforestregressor__max_depth': 5, 'randomforestregressor__max_features': 'sqrt'}
r2 score:0.924239466423


In [7]:
joblib.dump(model, 'rf_regressor.pkl')

['rf_regressor.pkl']