In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
# Load the dataset
df = pd.read_csv('training_dataset_4.csv', encoding='latin-1')
df.head()

Unnamed: 0,plateNumber,category,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,parameters,count
0,DHQ243,Army,1,,,,,Regular Plate,1497.0
1,REP221FL,House of Rep,1,,,,,Federal Government,1108.0
2,270UN56,Diplomatic,1,,,,,Local Government,649.0
3,DHQ386,Army,1,,,,,Diplomatic,411.0
4,FG40U48,Federal Government,1,,,,,Police,360.0


In [3]:
# Vectorize the license plate numbers
vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3))
X = vectorizer.fit_transform(df['plateNumber'])


In [4]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['category'])


In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

MultinomialNB()

In [7]:
# Predict the categories for the test data
y_pred = classifier.predict(X_test)

# Decode the predicted labels if needed
y_pred_decoded = label_encoder.inverse_transform(y_pred)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 93.50%


In [8]:
# Function to predict a category for a given license plate number
def predict_category():
    plate_number = input("Enter plate number: ")
    plate_vector = vectorizer.transform([plate_number])
    category_encoded = classifier.predict(plate_vector)[0]
    category = label_encoder.inverse_transform([category_encoded])[0]
    print(f'Input Plate Number: {plate_number}')
    print(f"Predicted Category: {category}")
#     return category

# # Test the predict_category function
# input_plate_number = 'FRSC2XK'
# predicted_category = predict_category(input_plate_number)
# print(f'Input Plate Number: {input_plate_number}')
# print(f'Predicted Category: {predicted_category}')

In [25]:
predict_category()

Enter plate number: REP385FU
Input Plate Number: REP385FU
Predicted Category: House of Rep


In [26]:
import joblib

# Save the trained Naive Bayes model
joblib.dump(classifier, 'plateNum_classifier_NBM_v2.joblib')


['plateNum_classifier_NBM_v2.joblib']

In [27]:
import joblib

# Assuming you have already created and fitted the vectorizer and label_encoder during training
# Save the CountVectorizer
joblib.dump(vectorizer, 'vectorizer2.joblib')




['vectorizer2.joblib']

In [None]:
# Save the LabelEncoder
joblib.dump(label_encoder, 'label_encoder.joblib')