In [1]:
##  Q1. What is the relationship between polynomial functions and kernel functions in machine learning
## algorithms?

## the relationship between polynomial functions and kernel functions is encapsulated in the concept of the polynomial kernel.
## A polynomial kernel is a type of kernel function that represents the similarity of vectors (training samples) in a feature
## The polynomial kernel function takes the form:
## K(x,y)=(x⋅y+c)d
## Here:

##( x ) and ( y ) are vectors in the input space, i.e., data points.
##( c ) is a constant term that allows adjusting the influence of higher-order versus lower-order terms in the polynomial.
#( d ) is the degree of the polynomial.

In [6]:
## Q2. How can we implement an SVM with a polynomial kernel in Python using Scikit-learn?

from sklearn import svm
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

x, y = make_classification(n_samples=100, n_features=4, random_state=42)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

clf = svm.SVC(kernel='poly', degree=3, C=1.0)
clf.fit(x_train,y_train)

In [9]:
y_pred=clf.predict(x_test)

In [10]:
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.62      1.00      0.77         5
           1       1.00      0.80      0.89        15

    accuracy                           0.85        20
   macro avg       0.81      0.90      0.83        20
weighted avg       0.91      0.85      0.86        20



In [11]:
## Q5. Assignment:
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()
    
    

In [12]:
print(cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [13]:
import pandas as pd
x=pd.DataFrame(cancer.data,columns=cancer.feature_names)
y=cancer.target

In [15]:
from sklearn.svm import SVC
svc=SVC(kernel='linear')

In [16]:
svc.fit(x_train,y_train)

In [17]:
svc.coef_

array([[ 1.37054852e+00,  2.59247874e-01, -2.77551335e-01,
         1.43482304e-03, -2.03866416e-01, -2.78008596e-01,
        -6.50956924e-01, -3.66965917e-01, -3.58274378e-01,
        -5.02742663e-02, -1.60308055e-01,  1.86945957e+00,
         5.54507782e-01, -1.26006116e-01, -2.67583590e-02,
         3.95984593e-02, -1.09584158e-02, -3.23554941e-02,
        -6.92577492e-02,  7.58005476e-03, -2.83148462e-01,
        -4.39851275e-01,  3.30823444e-03,  4.04421359e-03,
        -4.08424934e-01, -8.74892874e-01, -1.60355839e+00,
        -5.95155045e-01, -1.32923064e+00, -1.47466887e-01]])

In [18]:
y_pred=svc.predict(x_test)

In [19]:
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1])

In [21]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [22]:
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94        67
           1       0.97      0.97      0.97       121

    accuracy                           0.96       188
   macro avg       0.95      0.95      0.95       188
weighted avg       0.96      0.96      0.96       188

[[ 63   4]
 [  4 117]]
0.9574468085106383


In [36]:
paam_grid={
    'C':[0.1,1,10,100,1000],
    'gamma':[1,0.1,0.01,0.001,0.0001],
    'kernel':['linear']}

In [39]:
from sklearn.model_selection import GridSearchCV
grid= GridSearchCV(SVC(),param_grid=paam_grid,refit=True,cv=5,verbose=3)

In [42]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.961 total time=   0.1s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.934 total time=   0.1s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.974 total time=   0.4s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.934 total time=   0.1s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.895 total time=   0.1s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.961 total time=   0.1s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.934 total time=   0.1s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.974 total time=   0.4s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.934 total time=   0.1s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.895 total time=   0.1s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.961 total time=   0.1s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear

In [44]:
grid.best_params_

{'C': 100, 'gamma': 1, 'kernel': 'linear'}

In [45]:
y_pred4=grid.predict(x_test)

In [None]:
print(classification_report(y_pred4,y_test))
print(confusion_matrix(y_pred4,y_test))
print(accuracy_score(y_pred4,y_test))