In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

In [2]:
# Load data from the CSV file
data = pd.read_csv(r"C:\Users\ASUS\OneDrive\Desktop\Language Detection Model\language.csv")

In [3]:
#Check for null values
null_values = data.isnull().sum()
if null_values.any():
    print("The dataset contains null values. Please clean the data.")
    exit()

In [None]:
# Display value counts of the "language" column
print(data["language"].value_counts())

In [5]:
# Prepare features (X) and labels (y)
x = np.array(data["Text"])
y = np.array(data["language"])

In [6]:
# Transform text data into numerical format
cv = CountVectorizer()
X = cv.fit_transform(x)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
# Train the MultinomialNB model
model = MultinomialNB()
model.fit(X_train, y_train)


In [None]:
# Evaluate the model
score = model.score(X_test, y_test)
print(f"Model accuracy: {score * 100:.2f}%")


In [None]:
# Predict user input
while True:
    user = input("Enter a Text (or type 'exit' to quit): ")
    if user.lower() == 'exit':
        print("Exiting program.")
        break
    data = cv.transform([user]).toarray()
    output = model.predict(data)
    print(f"Predicted Language: {output[0]}")
