In [1]:
import pandas as pd

### Load Dataset

In [2]:
df = pd.read_excel('Raisin_Dataset.xlsx')
df.sample(5)

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
13,76792,338.857545,291.359202,0.510584,78842,0.772322,1042.77,Kecimen
461,64391,449.555901,187.122378,0.909256,68813,0.634269,1177.714,Besni
62,78571,443.822291,228.757615,0.856934,81718,0.642193,1157.33,Kecimen
695,86852,456.478688,248.606869,0.838684,90550,0.607854,1207.534,Besni
184,81546,381.348654,273.810099,0.696038,82807,0.71172,1057.448,Kecimen


In [3]:
X = df.drop('Class', axis=1)
X.sample()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter
449,41995,259.208878,210.350798,0.584337,43443,0.729701,801.526


In [4]:
y = df['Class']
y.sample()

324    Kecimen
Name: Class, dtype: object

### Split Dataset

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12)

### Train model using SVC: Kernel='rbf' without scaling Data

In [26]:
from sklearn.svm import SVC

model = SVC()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

model.n_iter_

array([193], dtype=int32)

### Evaluating Model

In [8]:
from sklearn.metrics import classification_report

result = classification_report(y_test, y_pred)
print(result)

              precision    recall  f1-score   support

       Besni       0.87      0.71      0.78       140
     Kecimen       0.74      0.88      0.81       130

    accuracy                           0.80       270
   macro avg       0.81      0.80      0.80       270
weighted avg       0.81      0.80      0.80       270



In [9]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred, y_test)
cm

array([[100,  15],
       [ 40, 115]])

### Train model using SVC: Kernel='linear' without scaling Data

In [10]:
from sklearn.svm import SVC

model_l = SVC(kernel='linear')
model_l.fit(X_train, y_train)

y_pred_l = model_l.predict(X_test)

model_l.n_iter_

array([182181822], dtype=int32)

### Evaluating Model

In [11]:
from sklearn.metrics import classification_report

result = classification_report(y_test, y_pred_l)
print(result)

              precision    recall  f1-score   support

       Besni       0.83      0.80      0.81       140
     Kecimen       0.79      0.82      0.81       130

    accuracy                           0.81       270
   macro avg       0.81      0.81      0.81       270
weighted avg       0.81      0.81      0.81       270



In [12]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred_l, y_test)
cm

array([[112,  23],
       [ 28, 107]])

### So, here above by using linear kernel the computational cost increased as no. of iteration increased  but there is no significant growth in accuracy

### Scaling Data

In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
X_train_scaled

array([[-0.41582724, -0.37613189, -0.35501501, ..., -0.42067499,
         0.35608803, -0.35136095],
       [ 2.1055657 ,  1.87956427,  1.69417725, ...,  2.1102089 ,
         0.26682251,  1.8082907 ],
       [-1.45116708, -1.45366875, -1.99079575, ..., -1.43869773,
        -1.6017914 , -1.63391715],
       ...,
       [-0.97441512, -0.96445004, -1.04584597, ..., -0.96589794,
        -0.40294609, -0.94470807],
       [ 0.42536393,  0.49984721,  0.46427322, ...,  0.41748013,
         1.35972891,  0.54798863],
       [ 0.23385762,  0.14370149,  0.45931268, ...,  0.22330458,
        -0.2292172 ,  0.23392001]])

### Model Training using Scaled Data

In [24]:
from sklearn.svm import SVC

model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)

y_pred_ = model.predict(X_test_scaled)

model.n_iter_

array([682], dtype=int32)

In [25]:
from sklearn.metrics import classification_report

result = classification_report(y_test, y_pred_)
print(result)

              precision    recall  f1-score   support

       Besni       0.88      0.79      0.83       140
     Kecimen       0.80      0.88      0.84       130

    accuracy                           0.84       270
   macro avg       0.84      0.84      0.84       270
weighted avg       0.84      0.84      0.84       270



### Uisng Pipeline

In [29]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('scale', StandardScaler()),('svc', SVC())
])

pipeline.fit(X_train, y_train)

In [31]:
y_pred_pipe = pipeline.predict(X_test)

from sklearn.metrics import classification_report

result = classification_report(y_test, y_pred_pipe)

print(result)

              precision    recall  f1-score   support

       Besni       0.90      0.79      0.84       140
     Kecimen       0.80      0.91      0.85       130

    accuracy                           0.84       270
   macro avg       0.85      0.85      0.84       270
weighted avg       0.85      0.84      0.84       270

