In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
# from sklearn.externals import joblib  # Use 'import joblib' if this line fails
import os
import cv2
from sklearn.decomposition import PCA
from sklearn.svm import SVC

# Path and classes
path = os.listdir('/kaggle/input/gujarati-language-hand-signs/data/train')
classes = {
    'ka': 0, 'kha': 1, 'ga': 2, 'gha': 3, 'cha': 4, 'chha': 5, 'ja': 6, 'za': 7, 'ta': 8, 'tha': 9,
    'dda': 10, 'dhha': 11, 'nna': 12, 'ta': 13, 'tha': 14, 'da': 15, 'dha': 16, 'na': 17, 'pa': 18,
    'fa': 19, 'ba': 20, 'bha': 21, 'ma': 22, 'ya': 23, 'ra': 24, 'la': 25, 'va': 26, 'sha': 27,
    'sh': 28, 'sa': 29, 'ha': 30, 'lla': 31, 'ksha': 32, 'gna': 33
}

# Load data
X = []
Y = []
for cls in classes:
    pth = '/kaggle/input/gujarati-language-hand-signs/data/train/' + cls
    for j in os.listdir(pth):
        img = cv2.imread(pth + '/' + j, 0)
        img = cv2.resize(img, (200, 200))
        X.append(img)
        Y.append(classes[cls])

# Convert to numpy arrays
X = np.array(X)
Y = np.array(Y)

# Reshape data
X_updated = X.reshape(len(X), -1)

# Train-test split
xtrain, xtest, ytrain, ytest = train_test_split(X_updated, Y, random_state=10, test_size=0.20)

# Normalize data
xtrain = xtrain / 255
xtest = xtest / 255

# Dimensionality reduction with PCA (optional)
pca = PCA(.98)
pca_train = pca.fit_transform(xtrain)
pca_test = pca.transform(xtest)

# Train SVM model
svm = SVC(C=0.1, kernel='linear')  # Adjust kernel as needed
svm.fit(xtrain, ytrain)

# Print accuracy
print("Training Score:", svm.score(xtrain, ytrain))
print("Testing Score:", svm.score(xtest, ytest))

# Save the model
joblib.dump(svm, 'svm_model.pkl')
print("Model saved as 'svm_model.pkl'")


Training Score: 0.9569779643231899
Testing Score: 0.8797202797202798
Model saved as 'svm_model.pkl'
