In [2]:
import pandas as pd
from sklearn.datasets import load_digits
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split

digits = load_digits()
df = pd.DataFrame(digits.data,columns = digits.feature_names)
df["target"] = digits.target

x = df.drop(columns = "target",axis = 1)
y = df["target"]

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

def get_score_func(model,x_train,x_test,y_train,y_test):
    reg = model
    reg.fit(x_train,y_train)
    return reg.score(x_test,y_test)

score_lr = []
score_svm = []
score_rf = []

from sklearn.model_selection import KFold
kf = KFold(n_splits = 3,random_state = 1206,shuffle = True)
for train_index,test_index in kf.split(digits.data):
    x_train,x_test,y_train,y_test = \
        digits.data[train_index],digits.data[test_index],digits.target[train_index],digits.target[test_index]
    score_lr.append(get_score_func(LinearRegression(),x_train,x_test,y_train,y_test))
    score_svm.append(get_score_func(SVC(),x_train,x_test,y_train,y_test))
    score_rf.append(get_score_func(RandomForestClassifier(),x_train,x_test,y_train,y_test))
print(score_lr)
print(score_svm)
print(score_rf)

[0.592559907163533, 0.5425525700249691, 0.5411495709180241]
[0.989983305509182, 0.989983305509182, 0.9833055091819699]
[0.9699499165275459, 0.9766277128547579, 0.9666110183639399]


In [4]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 3,shuffle = True,random_state = 10)
for train_index,test_index in skf.split(digits.data,digits.target):
    x_train,x_test,y_train,y_test = \
        digits.data[train_index],digits.data[test_index],digits.target[train_index],digits.target[test_index]
    score_lr.append(get_score_func(LinearRegression(),x_train,x_test,y_train,y_test))
    score_svm.append(get_score_func(SVC(),x_train,x_test,y_train,y_test))
    score_rf.append(get_score_func(RandomForestClassifier(),x_train,x_test,y_train,y_test))
print(score_lr)
print(score_svm)
print(score_rf)

[0.592559907163533, 0.5425525700249691, 0.5411495709180241, 0.5509164429011628, 0.5714456068477223, 0.55641674791555]
[0.989983305509182, 0.989983305509182, 0.9833055091819699, 0.986644407345576, 0.986644407345576, 0.9849749582637729]
[0.9699499165275459, 0.9766277128547579, 0.9666110183639399, 0.9716193656093489, 0.9732888146911519, 0.9816360601001669]


In [10]:
from sklearn.model_selection import cross_val_score
lr_score = (-1)*cross_val_score(LinearRegression(),x_train,y_train,
                           cv = kf,scoring = "neg_mean_squared_error")
print(lr_score)
svm_score = (-1)*cross_val_score(SVC(),x_train,y_train,
                           cv = kf,scoring = "neg_mean_squared_error")
print(svm_score)
rdc_score = (-1)*cross_val_score(RandomForestClassifier(),x_train,y_train,
                           cv = kf,scoring = "neg_mean_squared_error")
print(rdc_score)

[3.72198601 3.89527711 3.50093108]
[0.095      0.16541353 0.75438596]
[1.055      0.60902256 1.32581454]


In [18]:
from sklearn.metrics import mean_squared_error

num_folds = 10
seed = 1005
scoring = "neg_mean_squared_error"
models = []
models.append(("LR",LinearRegression()))
models.append(("SVC",SVC()))
models.append(("RFC",RandomForestClassifier()))
names = []
kfold_results = []
train_results = []
test_results = []
for name,model in models:
    names.append(name)
    kfold = KFold(n_splits = num_folds,random_state = seed,shuffle = True)
    cv_results = (-1)*cross_val_score(model,x_train,y_train,
                                      cv = kfold,scoring = scoring)
    kfold_results.append(cv_results)
    res = model.fit(x_train,y_train)
    train_result = mean_squared_error(res.predict(x_train),y_train)
    train_results.append(train_result)
    
    test_result = mean_squared_error(res.predict(x_test),y_test)
    test_results.append(test_result)
    message = "%s: %f (%f) %f %f" % (name,cv_results.mean(),
                                     cv_results.std(),
                                     train_result,test_result)
    print(message)

LR: 3.612053 (0.375607) 3.225317 3.638802
SVC: 0.343824 (0.352901) 0.101002 0.370618
RFC: 0.585343 (0.405001) 0.000000 0.293823
