In [1]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVR,SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import category_encoders as ce
import matplotlib as plt

Zbiór apartments
====

In [2]:
data = pd.read_csv('apartments.cvs')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
Unnamed: 0           1000 non-null int64
m2.price             1000 non-null int64
construction.year    1000 non-null int64
surface              1000 non-null int64
floor                1000 non-null int64
no.rooms             1000 non-null int64
district             1000 non-null object
dtypes: int64(6), object(1)
memory usage: 54.8+ KB


In [4]:
# Jako zmienną celu potraktuję zmienną m2.price toworząc w ten sposób zadanie Regresi

# kolumna pierwsza to indeksy można usunąć 
data = data.drop('Unnamed: 0',axis=1)


In [5]:
# Podziła na zbiór testowy/ternignowy 
X_train, X_test, y_train, y_test = train_test_split(data.drop('m2.price', axis=1), data['m2.price'])

In [6]:
#Użyję One-hote encodingu ponieważ Target nie da się w prosty sposób zastosować dla regresi
data.head()

Unnamed: 0,m2.price,construction.year,surface,floor,no.rooms,district
0,5897,1953,25,3,1,Srodmiescie
1,1818,1992,143,9,5,Bielany
2,3643,1937,56,1,2,Praga
3,3517,1995,93,7,3,Ochota
4,3013,1992,144,6,5,Mokotow


In [8]:

# SVM bez skalowania 


svm_ = SVR()
ohe = ce.OneHotEncoder()

# OHE
svm_.fit(ohe.fit_transform(X_train),y_train)
y_pred_ohe = svm_.predict(ohe.transform(X_test))



print(f'RMSE : {mean_squared_error(y_test,y_pred_ohe,squared=False)}')
print(f'R2 score : {r2_score(y_test,y_pred_ohe)}')




RMSE : 971.2846913916203
R2 score : -0.03483033372047473


Bez skalowania wynik tregiczne gorsze niż podawanie stałej wartosci dla każdego przypadku.

In [7]:
#
scal = StandardScaler(copy=False)

svm_ = SVR()
ohe = ce.OneHotEncoder()

columns_to_sclae =X_train.columns[:-1]

scaled_X_train = X_train.copy()
scaled_X_test = X_test.copy()

scaled_X_train = scal.fit_transform(scaled_X_train[columns_to_sclae])
scaled_X_test = scal.transform(scaled_X_test[columns_to_sclae])

# OHE
svm_.fit(ohe.fit_transform(scaled_X_train),y_train)
y_pred_ohe = svm_.predict(ohe.transform(scaled_X_test))



print(f'RMSE_scaled : {mean_squared_error(y_test,y_pred_ohe,squared=False)}')
print(f'R2 score_scaled : {r2_score(y_test,y_pred_ohe)}')

RMSE_scaled : 950.3485344536111
R2 score_scaled : 0.009300641771301965


Po przeskalowaniu wynik jest wyraznie lepszy choć nie zbyt zadowalający. Może pomoże dobranie własciwych parametrów.

In [46]:
from scipy.stats import uniform

param_distribution = {
    'kernel':['linear', 'poly', 'rbf', 'sigmoid',],
    'gamma': uniform(),
    'degree':[i for i in range(1,11)],
    'C':uniform(0,10),
    'epsilon':uniform(0,1)
    
}
rps = RandomizedSearchCV(SVR(),param_distributions=param_distribution,cv=5,n_jobs=1,n_iter=100,verbose=6)
# Poniważ używam one hota nie ma potrzeby użyć piplinu z kodowaniem


In [10]:
rps.fit(ohe.fit_transform(scaled_X_train),y_train)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly 
[CV]  C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly, score=0.266, total=   0.0s
[CV] C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly 
[CV]  C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly, score=0.382, total=   0.0s
[CV] C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly 
[CV]  C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly, score=0.293, total=   0.0s
[CV] C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly 
[CV]  C=7.660833571733553, degree=1, epsilon=0.8643131141465902, gamma=0.8962483792945437, kernel=poly, score=0.360, total=   0.0s
[

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s


[CV]  C=0.5542623903543065, degree=4, epsilon=0.5710468601133875, gamma=0.9046219564299464, kernel=rbf, score=0.003, total=   0.0s
[CV] C=0.5542623903543065, degree=4, epsilon=0.5710468601133875, gamma=0.9046219564299464, kernel=rbf 
[CV]  C=0.5542623903543065, degree=4, epsilon=0.5710468601133875, gamma=0.9046219564299464, kernel=rbf, score=-0.041, total=   0.0s
[CV] C=3.017357945562341, degree=1, epsilon=0.003468331412044834, gamma=0.6648401478221985, kernel=linear 
[CV]  C=3.017357945562341, degree=1, epsilon=0.003468331412044834, gamma=0.6648401478221985, kernel=linear, score=0.204, total=   0.0s
[CV] C=3.017357945562341, degree=1, epsilon=0.003468331412044834, gamma=0.6648401478221985, kernel=linear 
[CV]  C=3.017357945562341, degree=1, epsilon=0.003468331412044834, gamma=0.6648401478221985, kernel=linear, score=0.315, total=   0.0s
[CV] C=3.017357945562341, degree=1, epsilon=0.003468331412044834, gamma=0.6648401478221985, kernel=linear 
[CV]  C=3.017357945562341, degree=1, epsilo

[CV]  C=7.844574687345442, degree=4, epsilon=0.8413019090066624, gamma=0.3802700531378562, kernel=sigmoid, score=0.237, total=   0.1s
[CV] C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid 
[CV]  C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid, score=0.186, total=   0.1s
[CV] C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid 
[CV]  C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid, score=0.289, total=   0.1s
[CV] C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid 
[CV]  C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid, score=0.274, total=   0.0s
[CV] C=9.176544581856124, degree=8, epsilon=0.7608430831479172, gamma=0.839796750555392, kernel=sigmoid 
[CV]  C=9.176544581856124, degree=8, epsilon=0.

[CV]  C=4.111034380550197, degree=7, epsilon=0.556409541618503, gamma=0.4437894790666017, kernel=linear, score=0.325, total=   0.0s
[CV] C=4.111034380550197, degree=7, epsilon=0.556409541618503, gamma=0.4437894790666017, kernel=linear 
[CV]  C=4.111034380550197, degree=7, epsilon=0.556409541618503, gamma=0.4437894790666017, kernel=linear, score=0.292, total=   0.0s
[CV] C=8.797716839754962, degree=4, epsilon=0.7060717782306047, gamma=0.37243742086568243, kernel=linear 
[CV]  C=8.797716839754962, degree=4, epsilon=0.7060717782306047, gamma=0.37243742086568243, kernel=linear, score=0.272, total=   0.0s
[CV] C=8.797716839754962, degree=4, epsilon=0.7060717782306047, gamma=0.37243742086568243, kernel=linear 
[CV]  C=8.797716839754962, degree=4, epsilon=0.7060717782306047, gamma=0.37243742086568243, kernel=linear, score=0.398, total=   0.0s
[CV] C=8.797716839754962, degree=4, epsilon=0.7060717782306047, gamma=0.37243742086568243, kernel=linear 
[CV]  C=8.797716839754962, degree=4, epsilon=0

[CV]  C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly, score=0.302, total=   0.0s
[CV] C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly 
[CV]  C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly, score=0.213, total=   0.0s
[CV] C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly 
[CV]  C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly, score=0.290, total=   0.0s
[CV] C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly 
[CV]  C=1.4662138440281158, degree=3, epsilon=0.5557699603648528, gamma=0.9666170579794359, kernel=poly, score=0.277, total=   0.0s
[CV] C=5.889111521734022, degree=8, epsilon=0.8456392840740129, gamma=0.37269023338556573, kernel=poly 
[CV]  C=5.889111521734022, degree=8, epsilon=0.845639284

[CV]  C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid, score=0.014, total=   0.0s
[CV] C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid 
[CV]  C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid, score=0.013, total=   0.0s
[CV] C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid 
[CV]  C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid, score=0.007, total=   0.0s
[CV] C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid 
[CV]  C=2.8592025941623636, degree=8, epsilon=0.5686128760866389, gamma=0.00815160368292045, kernel=sigmoid, score=-0.035, total=   0.0s
[CV] C=4.600996371174768, degree=10, epsilon=0.6512565330685859, gamma=0.26162181427661824, kernel=linear 
[CV]  C=4.60099637117476

[CV]  C=3.3903936412977553, degree=9, epsilon=0.4334587696652026, gamma=0.3197582291518043, kernel=linear, score=0.273, total=   0.0s
[CV] C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf 
[CV]  C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf, score=-0.027, total=   0.0s
[CV] C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf 
[CV]  C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf, score=0.031, total=   0.0s
[CV] C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf 
[CV]  C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf, score=0.031, total=   0.0s
[CV] C=2.220550811848164, degree=7, epsilon=0.4186895343558785, gamma=0.9911503644554672, kernel=rbf 
[CV]  C=2.220550811848164, degree=7, epsilon=0.4186895343558785, ga

[CV]  C=6.2452562966349126, degree=3, epsilon=0.46996890481906795, gamma=0.7288223220150779, kernel=poly, score=0.296, total=   0.0s
[CV] C=6.2452562966349126, degree=3, epsilon=0.46996890481906795, gamma=0.7288223220150779, kernel=poly 
[CV]  C=6.2452562966349126, degree=3, epsilon=0.46996890481906795, gamma=0.7288223220150779, kernel=poly, score=0.292, total=   0.0s
[CV] C=2.599686182794417, degree=10, epsilon=0.2458820196859598, gamma=0.22018679514312323, kernel=poly 
[CV]  C=2.599686182794417, degree=10, epsilon=0.2458820196859598, gamma=0.22018679514312323, kernel=poly, score=0.017, total=   0.0s
[CV] C=2.599686182794417, degree=10, epsilon=0.2458820196859598, gamma=0.22018679514312323, kernel=poly 
[CV]  C=2.599686182794417, degree=10, epsilon=0.2458820196859598, gamma=0.22018679514312323, kernel=poly, score=-0.337, total=   0.0s
[CV] C=2.599686182794417, degree=10, epsilon=0.2458820196859598, gamma=0.22018679514312323, kernel=poly 
[CV]  C=2.599686182794417, degree=10, epsilon=0

[CV]  C=3.9521399606009466, degree=7, epsilon=0.8827863245032482, gamma=0.5777673604939588, kernel=sigmoid, score=0.232, total=   0.0s
[CV] C=3.9521399606009466, degree=7, epsilon=0.8827863245032482, gamma=0.5777673604939588, kernel=sigmoid 
[CV]  C=3.9521399606009466, degree=7, epsilon=0.8827863245032482, gamma=0.5777673604939588, kernel=sigmoid, score=0.230, total=   0.0s
[CV] C=3.9521399606009466, degree=7, epsilon=0.8827863245032482, gamma=0.5777673604939588, kernel=sigmoid 
[CV]  C=3.9521399606009466, degree=7, epsilon=0.8827863245032482, gamma=0.5777673604939588, kernel=sigmoid, score=0.188, total=   0.0s
[CV] C=9.054306479938614, degree=1, epsilon=0.47080354082001696, gamma=0.14653222134654453, kernel=rbf 
[CV]  C=9.054306479938614, degree=1, epsilon=0.47080354082001696, gamma=0.14653222134654453, kernel=rbf, score=0.107, total=   0.0s
[CV] C=9.054306479938614, degree=1, epsilon=0.47080354082001696, gamma=0.14653222134654453, kernel=rbf 
[CV]  C=9.054306479938614, degree=1, epsi

[CV]  C=2.7751797235571987, degree=7, epsilon=0.09531304110079919, gamma=0.14459159746096917, kernel=linear, score=0.305, total=   0.0s
[CV] C=2.7751797235571987, degree=7, epsilon=0.09531304110079919, gamma=0.14459159746096917, kernel=linear 
[CV]  C=2.7751797235571987, degree=7, epsilon=0.09531304110079919, gamma=0.14459159746096917, kernel=linear, score=0.249, total=   0.0s
[CV] C=1.56899877758938, degree=7, epsilon=0.7491974263455657, gamma=0.5748485372100789, kernel=sigmoid 
[CV]  C=1.56899877758938, degree=7, epsilon=0.7491974263455657, gamma=0.5748485372100789, kernel=sigmoid, score=0.077, total=   0.0s
[CV] C=1.56899877758938, degree=7, epsilon=0.7491974263455657, gamma=0.5748485372100789, kernel=sigmoid 
[CV]  C=1.56899877758938, degree=7, epsilon=0.7491974263455657, gamma=0.5748485372100789, kernel=sigmoid, score=0.172, total=   0.0s
[CV] C=1.56899877758938, degree=7, epsilon=0.7491974263455657, gamma=0.5748485372100789, kernel=sigmoid 
[CV]  C=1.56899877758938, degree=7, eps

[CV]  C=6.4093309463825126, degree=4, epsilon=0.7959387028782969, gamma=0.5107633521462852, kernel=rbf, score=0.131, total=   0.0s
[CV] C=6.4093309463825126, degree=4, epsilon=0.7959387028782969, gamma=0.5107633521462852, kernel=rbf 
[CV]  C=6.4093309463825126, degree=4, epsilon=0.7959387028782969, gamma=0.5107633521462852, kernel=rbf, score=0.077, total=   0.0s
[CV] C=7.1528762505607615, degree=10, epsilon=0.10527947983718833, gamma=0.405510172802798, kernel=linear 
[CV]  C=7.1528762505607615, degree=10, epsilon=0.10527947983718833, gamma=0.405510172802798, kernel=linear, score=0.266, total=   0.0s
[CV] C=7.1528762505607615, degree=10, epsilon=0.10527947983718833, gamma=0.405510172802798, kernel=linear 
[CV]  C=7.1528762505607615, degree=10, epsilon=0.10527947983718833, gamma=0.405510172802798, kernel=linear, score=0.386, total=   0.0s
[CV] C=7.1528762505607615, degree=10, epsilon=0.10527947983718833, gamma=0.405510172802798, kernel=linear 
[CV]  C=7.1528762505607615, degree=10, epsil

[CV]  C=5.612738072128549, degree=6, epsilon=0.371687592944664, gamma=0.4949739653461287, kernel=rbf, score=0.063, total=   0.0s
[CV] C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear 
[CV]  C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear, score=0.264, total=   0.0s
[CV] C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear 
[CV]  C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear, score=0.375, total=   0.0s
[CV] C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear 
[CV]  C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear, score=0.293, total=   0.0s
[CV] C=6.5277076146012005, degree=2, epsilon=0.5656682241044497, gamma=0.5326267727136055, kernel=linear 
[CV]  C=6.5277076146012005, degree=2, epsilon

[CV]  C=1.5018051796992882, degree=10, epsilon=0.4002777518565317, gamma=0.22529328253100633, kernel=sigmoid, score=0.125, total=   0.0s
[CV] C=1.5018051796992882, degree=10, epsilon=0.4002777518565317, gamma=0.22529328253100633, kernel=sigmoid 
[CV]  C=1.5018051796992882, degree=10, epsilon=0.4002777518565317, gamma=0.22529328253100633, kernel=sigmoid, score=0.120, total=   0.0s
[CV] C=1.5018051796992882, degree=10, epsilon=0.4002777518565317, gamma=0.22529328253100633, kernel=sigmoid 
[CV]  C=1.5018051796992882, degree=10, epsilon=0.4002777518565317, gamma=0.22529328253100633, kernel=sigmoid, score=0.070, total=   0.0s
[CV] C=7.670416196172548, degree=7, epsilon=0.3835170088361447, gamma=0.4731864527052012, kernel=rbf 
[CV]  C=7.670416196172548, degree=7, epsilon=0.3835170088361447, gamma=0.4731864527052012, kernel=rbf, score=0.064, total=   0.0s
[CV] C=7.670416196172548, degree=7, epsilon=0.3835170088361447, gamma=0.4731864527052012, kernel=rbf 
[CV]  C=7.670416196172548, degree=7, 

[CV]  C=3.653469115213511, degree=5, epsilon=0.6669287450979801, gamma=0.7225672283989506, kernel=poly, score=0.219, total=   0.1s
[CV] C=3.653469115213511, degree=5, epsilon=0.6669287450979801, gamma=0.7225672283989506, kernel=poly 
[CV]  C=3.653469115213511, degree=5, epsilon=0.6669287450979801, gamma=0.7225672283989506, kernel=poly, score=0.236, total=   0.2s
[CV] C=3.653469115213511, degree=5, epsilon=0.6669287450979801, gamma=0.7225672283989506, kernel=poly 
[CV]  C=3.653469115213511, degree=5, epsilon=0.6669287450979801, gamma=0.7225672283989506, kernel=poly, score=0.243, total=   0.1s
[CV] C=4.968847044751148, degree=3, epsilon=0.8009359670282854, gamma=0.5193268950794381, kernel=rbf 
[CV]  C=4.968847044751148, degree=3, epsilon=0.8009359670282854, gamma=0.5193268950794381, kernel=rbf, score=0.029, total=   0.0s
[CV] C=4.968847044751148, degree=3, epsilon=0.8009359670282854, gamma=0.5193268950794381, kernel=rbf 
[CV]  C=4.968847044751148, degree=3, epsilon=0.8009359670282854, ga

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 10.6min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                 epsilon=0.1, gamma='scale', kernel='rbf',
                                 max_iter=-1, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='deprecated', n_iter=100, n_jobs=1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f300a088290>,
                                        'degree': [1, 2, 3, 4, 5, 6, 7, 8, 9,
                                                   10],
                                        'epsilon': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f300a85f350>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f300a85f1d0>,
                                        'kernel': ['linear', 'poly', 'rbf',
                                                   'si

In [15]:
print(f'Best score: {rps.best_score_}')
rps.best_params_ 
# Co ciekawe najwyższy wynik uzyskujemy przy krenelu liniowym który jest najprostszy



Best score: 0.3383690233467386


{'C': 9.597329703009706,
 'degree': 4,
 'epsilon': 0.9404971670674875,
 'gamma': 0.8336685232275718,
 'kernel': 'linear'}

In [19]:

svm_best=rps.best_estimator_
svm_best.fit(ohe.fit_transform(scaled_X_train),y_train)
y_pred_ohe = svm_best.predict(ohe.transform(scaled_X_test))



print(f'RMSE : {mean_squared_error(y_test,y_pred_ohe,squared=False)}')
print(f'R2 score : {r2_score(y_test,y_pred_ohe)}')

# Wynik zbilrzony do CV co oznacza stabilność algorytmu 

RMSE : 703.8020388332371
R2 score : 0.33070594847605306


Drugi zbiór 
====
Z biblioteki OpenMl https://www.openml.org/d/1462. Zadanie polega na klasyfikacij binarnej. Klasyfikujemy czy dana soba ma cukrzycę czy nie.

In [20]:
data2 = pd.read_csv('dataset_37_diabetes.csv')
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
preg     768 non-null int64
plas     768 non-null int64
pres     768 non-null int64
skin     768 non-null int64
insu     768 non-null int64
mass     768 non-null float64
pedi     768 non-null float64
age      768 non-null int64
class    768 non-null object
dtypes: float64(2), int64(6), object(1)
memory usage: 54.1+ KB


In [36]:

#Żadna z danych nie jest bardzo skośna (nie potrzeba dodatkowych przekształceń)
from collections import Counter

print(Counter(data2['class']))
# Klasy nie są idelanie zbalansowane ale miary takie jak acc powinny mieć sens 
data2.skew()

Counter({'tested_negative': 500, 'tested_positive': 268})


preg    0.901674
plas    0.173754
pres   -1.843608
skin    0.109372
insu    2.272251
mass   -0.428982
pedi    1.919911
age     1.129597
dtype: float64

In [39]:
# Trzeba poprawić labele 
data2['class'] = np.where(data2['class']=='tested_negative',0,1)

In [42]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(data2.drop('class', axis=1), data2['class'])

In [43]:
# skalowanie 
scal = StandardScaler(copy=False)

X_train2 = pd.DataFrame(scal.fit_transform(X_train2))
X_test2 = pd.DataFrame(scal.transform(X_test2))

In [44]:
# test bez CV i z podstawowymi parametrami 
svm_ = SVC(probability=True)
# Tym razem nie ma potrzeby kodowania 
svm_.fit(X_train2,y_train2)

from sklearn.metrics import accuracy_score,auc,roc_curve,f1_score,recall_score


y_pred = svm_.predict(X_test2)





print(f'Accuracy : {accuracy_score(y_test2,y_pred)}')
print(f'F1_score : {f1_score(y_test2,y_pred)}')
print(f'Reccal : {recall_score(y_test2,y_pred)}')

Accuracy : 0.8020833333333334
F1_score : 0.6481481481481481
Reccal : 0.5645161290322581


Wyniki zwłaszcza reccal nie są zbyt dobre.

In [71]:
param_distribution = {
    'kernel':['linear', 'poly', 'rbf', 'sigmoid',],
    'gamma': uniform(),
    'degree':[i for i in range(1,11)],
    'C':uniform(0,10),
    
}
rps_c = RandomizedSearchCV(SVC(),param_distributions=param_distribution,cv=5,n_jobs=1,n_iter=200,verbose=6)

In [72]:
rps_c.fit(X_train2,y_train2)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV] C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf 
[CV]  C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf, score=0.655, total=   0.0s
[CV] C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf 
[CV]  C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf, score=0.730, total=   0.0s
[CV] C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf 
[CV]  C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf, score=0.722, total=   0.0s
[CV] C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf 
[CV]  C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf, score=0.713, total=   0.0s
[CV] C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf 
[CV]  C=1.300717294414977, degree=2, gamma=0.4483790669125597, kernel=rbf, score=0.748, total=   0.0s
[CV] C=8.155340995378964, degree=10, gamma=0.35894981784

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s


[CV]  C=5.419829621202723, degree=6, gamma=0.24616389537128402, kernel=poly, score=0.609, total=   0.0s
[CV] C=5.419829621202723, degree=6, gamma=0.24616389537128402, kernel=poly 
[CV]  C=5.419829621202723, degree=6, gamma=0.24616389537128402, kernel=poly, score=0.704, total=   0.0s
[CV] C=5.419829621202723, degree=6, gamma=0.24616389537128402, kernel=poly 
[CV]  C=5.419829621202723, degree=6, gamma=0.24616389537128402, kernel=poly, score=0.643, total=   0.0s
[CV] C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid 
[CV]  C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid, score=0.612, total=   0.0s
[CV] C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid 
[CV]  C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid 
[CV]  C=2.304885825993198, degree=10, gamma=0.6904521528904972, kernel=sigmoid, score

[CV]  C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf, score=0.672, total=   0.0s
[CV] C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf 
[CV]  C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf, score=0.730, total=   0.0s
[CV] C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf 
[CV]  C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf, score=0.687, total=   0.0s
[CV] C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf 
[CV]  C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf, score=0.704, total=   0.0s
[CV] C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf 
[CV]  C=5.456617375363159, degree=9, gamma=0.6088365315877902, kernel=rbf, score=0.730, total=   0.0s
[CV] C=7.344723782804735, degree=4, gamma=0.00033673529446309747, kernel=sigmoid 
[CV]  C=7.344723782804735, degree=4, gamma=0.00033673529446309747, kernel=sigmoid, score=0.638, total=   0.0s
[C

[CV]  C=4.470914176805236, degree=7, gamma=0.5080447732873026, kernel=linear, score=0.783, total=   0.0s
[CV] C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid 
[CV]  C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid, score=0.655, total=   0.0s
[CV] C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid 
[CV]  C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid, score=0.661, total=   0.0s
[CV] C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid 
[CV]  C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid, score=0.722, total=   0.0s
[CV] C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid 
[CV]  C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid, score=0.652, total=   0.0s
[CV] C=0.1392927422800072, degree=10, gamma=0.6214406046750626, kernel=sigmoid 
[CV]  C=0.1392927422800072, degree=10, gamma=0.6214406046750626

[CV]  C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid, score=0.661, total=   0.0s
[CV] C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid 
[CV]  C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid, score=0.722, total=   0.0s
[CV] C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid 
[CV]  C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid 
[CV]  C=3.2390675753489018, degree=2, gamma=0.14172889065256655, kernel=sigmoid, score=0.661, total=   0.0s
[CV] C=2.0575161570658818, degree=1, gamma=0.9243876419467226, kernel=linear 
[CV]  C=2.0575161570658818, degree=1, gamma=0.9243876419467226, kernel=linear, score=0.707, total=   0.0s
[CV] C=2.0575161570658818, degree=1, gamma=0.9243876419467226, kernel=linear 
[CV]  C=2.0575161570658818, degree=1, gamma=0.9243876419467226, ke

[CV]  C=7.395197090754753, degree=5, gamma=0.7142793039863164, kernel=poly, score=0.687, total=   0.1s
[CV] C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf 
[CV]  C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf, score=0.707, total=   0.0s
[CV] C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf 
[CV]  C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf, score=0.687, total=   0.0s
[CV] C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf 
[CV]  C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf, score=0.687, total=   0.0s
[CV] C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf 
[CV]  C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf, score=0.670, total=   0.0s
[CV] C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf 
[CV]  C=3.469569010692799, degree=6, gamma=0.9124157420554064, kernel=rbf, score=0.730, total=   0.0s
[CV] C=9.39646109

[CV]  C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid, score=0.655, total=   0.0s
[CV] C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid 
[CV]  C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid 
[CV]  C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid, score=0.765, total=   0.0s
[CV] C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid 
[CV]  C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid, score=0.696, total=   0.0s
[CV] C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid 
[CV]  C=5.332563477944037, degree=7, gamma=0.07000595164893342, kernel=sigmoid, score=0.678, total=   0.0s
[CV] C=6.1685162084350935, degree=8, gamma=0.9005209547116805, kernel=linear 
[CV]  C=6.1685162084350935, degree=8, gamma=0.9005209547116805, kernel=

[CV]  C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly, score=0.652, total=   0.0s
[CV] C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly 
[CV]  C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly, score=0.678, total=   0.0s
[CV] C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly 
[CV]  C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly, score=0.722, total=   0.0s
[CV] C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly 
[CV]  C=2.4911584313824306, degree=9, gamma=0.47355538505051387, kernel=poly, score=0.748, total=   0.0s
[CV] C=9.950513415678943, degree=7, gamma=0.4950380443136101, kernel=poly 
[CV]  C=9.950513415678943, degree=7, gamma=0.4950380443136101, kernel=poly, score=0.681, total=   0.0s
[CV] C=9.950513415678943, degree=7, gamma=0.4950380443136101, kernel=poly 
[CV]  C=9.950513415678943, degree=7, gamma=0.4950380443136101, kernel=poly, score=0.687, total= 

[CV]  C=2.440899402964841, degree=5, gamma=0.7074708094128989, kernel=poly, score=0.652, total=   0.0s
[CV] C=2.440899402964841, degree=5, gamma=0.7074708094128989, kernel=poly 
[CV]  C=2.440899402964841, degree=5, gamma=0.7074708094128989, kernel=poly, score=0.739, total=   0.0s
[CV] C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear 
[CV]  C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear, score=0.707, total=   0.0s
[CV] C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear 
[CV]  C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear, score=0.730, total=   0.0s
[CV] C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear 
[CV]  C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear, score=0.783, total=   0.0s
[CV] C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear 
[CV]  C=2.2290261863060943, degree=4, gamma=0.9079338901228297, kernel=linear, score=0.77

[CV]  C=1.021117578909696, degree=7, gamma=0.24999393059039154, kernel=rbf, score=0.757, total=   0.0s
[CV] C=1.021117578909696, degree=7, gamma=0.24999393059039154, kernel=rbf 
[CV]  C=1.021117578909696, degree=7, gamma=0.24999393059039154, kernel=rbf, score=0.765, total=   0.0s
[CV] C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf 
[CV]  C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf, score=0.664, total=   0.0s
[CV] C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf 
[CV]  C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf, score=0.713, total=   0.0s
[CV] C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf 
[CV]  C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf, score=0.678, total=   0.0s
[CV] C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf 
[CV]  C=6.241579555763773, degree=5, gamma=0.455739951717237, kernel=rbf, score=0.730, total=   0.0s
[CV] C=6.24157955576377

[CV]  C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly, score=0.655, total=   0.0s
[CV] C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly 
[CV]  C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly, score=0.609, total=   0.0s
[CV] C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly 
[CV]  C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly, score=0.635, total=   0.0s
[CV] C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly 
[CV]  C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly, score=0.687, total=   0.0s
[CV] C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly 
[CV]  C=4.876608958234617, degree=8, gamma=0.2509378524739878, kernel=poly, score=0.661, total=   0.0s
[CV] C=5.877163854640902, degree=2, gamma=0.9607060364723474, kernel=linear 
[CV]  C=5.877163854640902, degree=2, gamma=0.9607060364723474, kernel=linear, score=0.707, total=   0.0s
[CV

[CV]  C=5.469774553732139, degree=9, gamma=0.9426136598858849, kernel=linear, score=0.783, total=   0.0s
[CV] C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf 
[CV]  C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf, score=0.698, total=   0.0s
[CV] C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf 
[CV]  C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf, score=0.713, total=   0.0s
[CV] C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf 
[CV]  C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf, score=0.704, total=   0.0s
[CV] C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf 
[CV]  C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf, score=0.687, total=   0.0s
[CV] C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf 
[CV]  C=1.8587869874973073, degree=3, gamma=0.8461672817681373, kernel=rbf, score=0.730, total=   0.0s
[CV] 

[CV]  C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid 
[CV]  C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid, score=0.722, total=   0.0s
[CV] C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid 
[CV]  C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid, score=0.557, total=   0.0s
[CV] C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid 
[CV]  C=4.27366644579712, degree=4, gamma=0.750817232053329, kernel=sigmoid, score=0.670, total=   0.0s
[CV] C=1.547869462692203, degree=5, gamma=0.4706113420115581, kernel=sigmoid 
[CV]  C=1.547869462692203, degree=5, gamma=0.4706113420115581, kernel=sigmoid, score=0.621, total=   0.0s
[CV] C=1.547869462692203, degree=5, gamma=0.4706113420115581, kernel=sigmoid 
[CV]  C=1.547869462692203, degree=5, gamma=0.4706113420115581, kernel=sigmoid, score=0.661, to

[CV]  C=4.282605772498277, degree=7, gamma=0.7980149155016862, kernel=linear, score=0.774, total=   0.0s
[CV] C=4.282605772498277, degree=7, gamma=0.7980149155016862, kernel=linear 
[CV]  C=4.282605772498277, degree=7, gamma=0.7980149155016862, kernel=linear, score=0.783, total=   0.0s
[CV] C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid 
[CV]  C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid, score=0.655, total=   0.0s
[CV] C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid 
[CV]  C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid 
[CV]  C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid, score=0.687, total=   0.0s
[CV] C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid 
[CV]  C=7.346707247796955, degree=3, gamma=0.3707706428353802, kernel=sigmoid, scor

[CV]  C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly, score=0.647, total=   0.2s
[CV] C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly 
[CV]  C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly, score=0.696, total=   0.2s
[CV] C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly 
[CV]  C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly, score=0.704, total=   0.4s
[CV] C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly 
[CV]  C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly, score=0.765, total=   0.1s
[CV] C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly 
[CV]  C=8.239421672128568, degree=2, gamma=0.6998591719246152, kernel=poly, score=0.670, total=   0.4s
[CV] C=2.8853476699327962, degree=3, gamma=0.7462215757724159, kernel=sigmoid 
[CV]  C=2.8853476699327962, degree=3, gamma=0.7462215757724159, kernel=sigmoid, score=0.612, total=   0.0s

[CV]  C=8.668719889945255, degree=1, gamma=0.021893183380886305, kernel=sigmoid, score=0.774, total=   0.0s
[CV] C=8.668719889945255, degree=1, gamma=0.021893183380886305, kernel=sigmoid 
[CV]  C=8.668719889945255, degree=1, gamma=0.021893183380886305, kernel=sigmoid, score=0.757, total=   0.0s
[CV] C=8.668719889945255, degree=1, gamma=0.021893183380886305, kernel=sigmoid 
[CV]  C=8.668719889945255, degree=1, gamma=0.021893183380886305, kernel=sigmoid, score=0.783, total=   0.0s
[CV] C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf 
[CV]  C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf, score=0.707, total=   0.0s
[CV] C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf 
[CV]  C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf, score=0.696, total=   0.0s
[CV] C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf 
[CV]  C=3.6774286676732304, degree=10, gamma=0.8578406868899918, kernel=rbf, sco

[CV]  C=1.7660309848826772, degree=6, gamma=0.49330810298741845, kernel=poly, score=0.670, total=   0.0s
[CV] C=1.7660309848826772, degree=6, gamma=0.49330810298741845, kernel=poly 
[CV]  C=1.7660309848826772, degree=6, gamma=0.49330810298741845, kernel=poly, score=0.626, total=   0.0s
[CV] C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear 
[CV]  C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear, score=0.707, total=   0.0s
[CV] C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear 
[CV]  C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear, score=0.730, total=   0.0s
[CV] C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear 
[CV]  C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear, score=0.783, total=   0.0s
[CV] C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear 
[CV]  C=0.2340651347440037, degree=3, gamma=0.8115805620969722, kernel=linear, scor

[CV]  C=4.097110536933348, degree=5, gamma=0.6948081726585573, kernel=poly, score=0.583, total=   0.0s
[CV] C=4.097110536933348, degree=5, gamma=0.6948081726585573, kernel=poly 
[CV]  C=4.097110536933348, degree=5, gamma=0.6948081726585573, kernel=poly, score=0.670, total=   0.0s
[CV] C=4.097110536933348, degree=5, gamma=0.6948081726585573, kernel=poly 
[CV]  C=4.097110536933348, degree=5, gamma=0.6948081726585573, kernel=poly, score=0.713, total=   0.0s
[CV] C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf 
[CV]  C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf, score=0.698, total=   0.0s
[CV] C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf 
[CV]  C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf, score=0.696, total=   0.0s
[CV] C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf 
[CV]  C=9.21047295473799, degree=1, gamma=0.9122637093835879, kernel=rbf, score=0.687, total=   0.0s
[CV] C=9.2104729547

[CV]  C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid, score=0.652, total=   0.0s
[CV] C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid 
[CV]  C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid, score=0.678, total=   0.0s
[CV] C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid 
[CV]  C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid, score=0.609, total=   0.0s
[CV] C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid 
[CV]  C=1.9482988946242186, degree=5, gamma=0.47635388463550754, kernel=sigmoid, score=0.652, total=   0.0s
[CV] C=8.518014766520295, degree=4, gamma=0.07110703122764117, kernel=rbf 
[CV]  C=8.518014766520295, degree=4, gamma=0.07110703122764117, kernel=rbf, score=0.672, total=   0.0s
[CV] C=8.518014766520295, degree=4, gamma=0.07110703122764117, kernel=rbf 
[CV]  C=8.518014766520295, degree=4, gamma=0.07110703122764117, kernel=rbf,

[CV]  C=6.166556016604491, degree=9, gamma=0.9155944041008112, kernel=linear, score=0.783, total=   0.0s
[CV] C=6.166556016604491, degree=9, gamma=0.9155944041008112, kernel=linear 
[CV]  C=6.166556016604491, degree=9, gamma=0.9155944041008112, kernel=linear, score=0.774, total=   0.0s
[CV] C=6.166556016604491, degree=9, gamma=0.9155944041008112, kernel=linear 
[CV]  C=6.166556016604491, degree=9, gamma=0.9155944041008112, kernel=linear, score=0.783, total=   0.0s
[CV] C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf 
[CV]  C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf, score=0.655, total=   0.0s
[CV] C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf 
[CV]  C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf, score=0.730, total=   0.0s
[CV] C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf 
[CV]  C=1.3617991002851448, degree=8, gamma=0.6336133925949222, kernel=rbf, score=0.704, total=   0.

[CV]  C=7.318384667434713, degree=2, gamma=0.3592012007294024, kernel=sigmoid, score=0.600, total=   0.0s
[CV] C=7.318384667434713, degree=2, gamma=0.3592012007294024, kernel=sigmoid 
[CV]  C=7.318384667434713, degree=2, gamma=0.3592012007294024, kernel=sigmoid, score=0.643, total=   0.0s
[CV] C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear 
[CV]  C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear, score=0.707, total=   0.0s
[CV] C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear 
[CV]  C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear, score=0.730, total=   0.0s
[CV] C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear 
[CV]  C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear, score=0.783, total=   0.0s
[CV] C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear 
[CV]  C=9.592855299275637, degree=1, gamma=0.999857942282362, kernel=linear, score=0.774, tota

[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:   36.0s finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='scale', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=200, n_jobs=1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3222648050>,
                                        'degree': [1, 2, 3, 4, 5, 6, 7, 8, 9,
                                                   10],
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3222648e10>,
                                        'kernel': ['linear', 'poly', 'rbf',
                  

In [73]:
print(rps_c.best_score_)
# Najlepszy wynik nie różni się znacznie od parametrów domyślnych
rps_c.best_params_
# jądro liniowe  


0.757031484257871


{'C': 0.4412095386638015,
 'degree': 7,
 'gamma': 0.3264357187566156,
 'kernel': 'linear'}

In [74]:
svm_c_best = rps_c.best_estimator_

svm_c_best.fit(X_train2,y_train2)



y_pred = svm_c_best.predict(X_test2)





print(f'Accuracy : {accuracy_score(y_test2,y_pred)}')
print(f'F1_score : {f1_score(y_test2,y_pred)}')
print(f'Reccal : {recall_score(y_test2,y_pred)}')
# Wynik podobny jak  przypadku parametrów domyślnych oznacza ,że domyślne parametry były dobre a zbiór jest prawdowpodobnie  trudny 

Accuracy : 0.796875
F1_score : 0.6355140186915887
Reccal : 0.5483870967741935


Algorytm ma niski reccal co onzacza ,że dużo ludzi jest mylnie klasyfikowanych jako zdrowych.