# IMPORTS

In [34]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef

# PREPROCESSING
Importing dataset

In [35]:
letterdf = pd.read_csv('letter-recognition.csv')

Checking head

In [36]:
print(letterdf.head(10))

  letter  xbox   ybox   width   height  onpix   xbar   ybar   x2bar  y2bar   \
0      T      2      8       3       5       1      8     13      0       6   
1      I      5     12       3       7       2     10      5      5       4   
2      D      4     11       6       8       6     10      6      2       6   
3      N      7     11       6       6       3      5      9      4       6   
4      G      2      1       3       1       1      8      6      6       6   
5      S      4     11       5       8       3      8      8      6       9   
6      B      4      2       5       4       4      8      7      6       6   
7      A      1      1       3       2       1      8      2      2       2   
8      J      2      2       4       4       2     10      6      2       6   
9      M     11     15      13       9       7     13      2      6       2   

   xybar   x2ybar  xy2bar  xedge   xedgey  yedge   yedgex  
0       6      10       8       0       8       0       8  
1      13 

Checking null values

In [37]:
letterdf.isna().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

Handling categorical data

In [38]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(letterdf['letter'])
letterdf['letter'] = encoded_data

# AI
Checking shape

In [39]:
letterdf.shape

(20000, 17)

Data separation

In [40]:
X = letterdf.drop('letter', axis=1)
y = letterdf.letter

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [42]:
svm_classifier = SVC(kernel='rbf')

In [43]:
svm_classifier.fit(X_train, y_train)

In [44]:
predictions = svm_classifier.predict(X_test)

In [45]:
print('Confusion Matrix:')
print(confusion_matrix(y_test, predictions))

Confusion Matrix:
[[257   0   2   0   0   0   1   0   0   0   1   0   2   0   0   0   0   0
    0   0   1   0   0   0   0   0]
 [  1 239   0   5   0   1   0   0   0   0   0   0   0   0   0   0   0   7
    0   0   0   0   0   0   0   0]
 [  0   0 186   0   6   0   6   1   0   0   1   0   1   0   8   0   0   1
    0   0   2   0   1   0   0   0]
 [  0   5   0 269   0   0   0   3   0   0   0   0   0   2   2   0   0   1
    0   0   0   0   0   0   0   0]
 [  0   1   2   0 235   0  10   0   0   0   1   0   0   0   0   0   2   0
    0   0   0   0   0   1   0   5]
 [  0   2   0   0   1 220   0   0   1   1   0   0   0   0   0   0   0   0
    2   5   0   0   0   0   0   0]
 [  1   1   1   5   0   2 225   0   0   0   2   0   0   0   1   0   3   2
    0   0   0   2   0   0   0   0]
 [  0   8   0  12   0   0   4 166   0   0   6   0   0   2   6   2   6  21
    0   0   3   0   0   0   2   0]
 [  0   1   1   2   0   4   0   0 212  12   0   0   0   0   0   0   0   0
    4   0   0   0   0   2   0   1]
 

In [46]:
print('Classification Report:')
print(classification_report(y_test, predictions))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       264
           1       0.84      0.94      0.89       253
           2       0.95      0.87      0.91       213
           3       0.85      0.95      0.90       282
           4       0.91      0.91      0.91       257
           5       0.87      0.95      0.91       232
           6       0.85      0.92      0.88       245
           7       0.91      0.70      0.79       238
           8       0.98      0.89      0.93       239
           9       0.93      0.89      0.91       245
          10       0.87      0.86      0.87       203
          11       0.99      0.92      0.95       248
          12       0.94      0.97      0.96       274
          13       0.95      0.91      0.93       254
          14       0.86      0.91      0.88       248
          15       0.99      0.84      0.91       277
          16       0.93      0.94      0.94       279
    

In [47]:
print(f'Matthews Correlation Coefficient: {matthews_corrcoef(y_test, predictions):.2f}')

Matthews Correlation Coefficient: 0.91
