In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split

### Testing regression

In [14]:
dataset = pd.read_csv('data/regression/car_dataset.csv')
X = dataset.drop(['Car_Name','Selling_Price'],axis=1)
Y = dataset['Selling_Price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=2)

In [15]:
from ubml.train_test import regression_train_test

metrics_performance, best_model = regression_train_test(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test)

In [16]:
metrics_performance

Unnamed: 0,r2 score train,r2 score test,mean squared error train,mean squared error test,root mean squared error train,root mean squared error test,mean absolute error train,mean absolute error test,explained variance score train,explained variance score test,max error train,max error test
Linear Regression,0.88,0.837,3.259,2.15,1.805,1.466,1.217,1.152,0.88,0.846,11.078,3.975
Lasso Regression,0.843,0.871,4.268,1.698,2.066,1.303,1.286,1.051,0.843,0.871,12.38,3.281
Ridge Regression,0.88,0.84,3.26,2.104,1.806,1.45,1.213,1.143,0.88,0.849,11.113,3.884
Support Vector Regression,-0.078,0.109,29.252,11.729,5.409,3.425,3.236,2.679,-0.003,0.161,31.218,9.118
Decision Tree Regression,1.0,0.943,0.0,0.75,0.0,0.866,0.0,0.495,1.0,0.943,0.0,3.5
Random Forest Regression,0.985,0.983,0.405,0.222,0.636,0.471,0.249,0.321,0.985,0.984,6.946,1.161


In [17]:
best_model

'Random Forest Regression'

### Exporting best model

In [18]:
metrics_performance, best_model = regression_train_test(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test, export_best=True)

### Loading and Predicting

In [19]:
from ubml.train_test import load_model

model = load_model("best_model.pkl")

In [20]:
dataset.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,0,0,0,0
1,sx4,2013,4.75,9.54,43000,1,0,0,0
2,ciaz,2017,7.25,9.85,6900,0,0,0,0
3,wagon r,2011,2.85,4.15,5200,0,0,0,0
4,swift,2014,4.6,6.87,42450,1,0,0,0


In [21]:
from ubml.train_test import predict

prediction = predict("best_model.pkl", input=[2013, 9.54, 43000, 1, 0, 0, 0])
prediction

5.122599999999998

## Testing model_train_test()

In [10]:
from ubml.train_test import model_train_test

metrics_performance, best_model = model_train_test(mode="regression", x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test)
print(best_model)
metrics_performance

Random Forest Regression


Unnamed: 0,r2 score train,r2 score test,mean squared error train,mean squared error test,root mean squared error train,root mean squared error test,mean absolute error train,mean absolute error test,explained variance score train,explained variance score test,max error train,max error test
Linear Regression,0.88,0.837,3.259,2.15,1.805,1.466,1.217,1.152,0.88,0.846,11.078,3.975
Lasso Regression,0.843,0.871,4.268,1.698,2.066,1.303,1.286,1.051,0.843,0.871,12.38,3.281
Ridge Regression,0.88,0.84,3.26,2.104,1.806,1.45,1.213,1.143,0.88,0.849,11.113,3.884
Support Vector Regression,-0.078,0.109,29.252,11.729,5.409,3.425,3.236,2.679,-0.003,0.161,31.218,9.118
Decision Tree Regression,1.0,0.927,0.0,0.958,0.0,0.979,0.0,0.524,1.0,0.928,0.0,4.25
Random Forest Regression,0.986,0.977,0.37,0.297,0.609,0.545,0.241,0.351,0.986,0.979,6.704,1.931


### Testing Classification

In [23]:
dataset = pd.read_csv('data/classification/heart.csv')
X = dataset.drop(columns='target', axis=1)
Y = dataset['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [24]:
from ubml.train_test import classification_train_test

metrics_performance, best_model = classification_train_test(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test)
print(best_model)
metrics_performance

Logistic Regression


Unnamed: 0,accuracy score train,accuracy score test,f1 score train,f1 score test,recall score train,recall score test,jaccard score train,jaccard score test,precision score train,precision score test
Logistic Regression,0.852,0.805,0.863,0.823,0.903,0.886,0.758,0.699,0.826,0.769
KNN Classifier,0.906,0.722,0.907,0.722,0.893,0.705,0.83,0.565,0.922,0.74
SVM Classifier,0.721,0.707,0.741,0.732,0.777,0.781,0.588,0.577,0.708,0.689
Naive Bayes Classifier,0.839,0.78,0.848,0.791,0.874,0.81,0.736,0.654,0.823,0.773
Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
