In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
file_path = '/content/Datasetprojpowerbi.csv'
dataset = pd.read_csv(file_path)

# Encode the target variable ('Genre') for classification
label_encoder = LabelEncoder()
dataset['Genre_encoded'] = label_encoder.fit_transform(dataset['Genre'])

# Prepare features and target
X_text = dataset['Reports']  # complaint text data
y = dataset['Genre_encoded']  # target variable (encoded genres)

# Convert complaint text data to numerical features using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=500)  # limit features to top 500 terms for simplicity
X_text_tfidf = tfidf_vectorizer.fit_transform(X_text)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_text_tfidf, y, test_size=0.3, random_state=42)

# Initialize and train K-Nearest Neighbors classifier
knn = KNeighborsClassifier(n_neighbors=5)  # using k=5 neighbors as a starting point
knn.fit(X_train, y_train)

# Predict on the test set and evaluate the classifier
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

# Print the results
print("KNN Classifier Accuracy:", accuracy)
print("\nClassification Report:\n", report)


ModuleNotFoundError: No module named 'flask'

In [None]:
import pandas as pd

# Load the new dataset containing complaints
new_file_path = '/content/Datasetprojpowerbi.csv'  # Path to the new CSV file
new_complaints = pd.read_csv(new_file_path)

# Transform the complaint text data to numerical features using the previously trained TF-IDF vectorizer
X_new_text_tfidf = tfidf_vectorizer.transform(new_complaints['Reports'])

# Predict classes for the new complaints
new_complaints['Predicted_Genre'] = label_encoder.inverse_transform(knn.predict(X_new_text_tfidf))

# Count the number of complaints for each class
complaint_counts = new_complaints['Predicted_Genre'].value_counts()

# Print the complaint counts
print("Complaint counts per class:")
print(complaint_counts)