In [None]:
import nltk
import random
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

# Sample data (you should replace this with your own dataset)
data = [
    ("I hate you, go away", "hate"),
    ("Love and peace to all", "non-hate"),
    ("I hope you rot in hell", "hate"),
    ("Spread kindness and love", "non-hate"),
    ("Kill all [ethnic group]", "hate"),
    ("Respect diversity and inclusion", "non-hate")
]

# Preprocess data
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    words = word_tokenize(text.lower())
    words = [word for word in words if word.isalpha() and word not in stop_words]
    return ' '.join(words)

# Apply preprocessing to data
preprocessed_data = [(preprocess_text(text), label) for text, label in data]

# Split data into training and testing sets
X = [text for text, label in preprocessed_data]
y = [label for text, label in preprocessed_data]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train a Support Vector Machine (SVM) classifier
svm_classifier = LinearSVC()
svm_classifier.fit(X_train_vectorized, y_train)

# Evaluate the model
y_pred = svm_classifier.predict(X_test_vectorized)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Test the model with user input
while True:
    user_input = input("Enter a text for classification (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break

    user_input_preprocessed = preprocess_text(user_input)
    user_input_vectorized = vectorizer.transform([user_input_preprocessed])
    prediction = svm_classifier.predict(user_input_vectorized)

    print(f"Text: {user_input} | Prediction: {prediction}")
    
# Evaluate the model with zero_division parameter set to 1
print("Classification Report:")
print(classification_report(y_test, y_pred, zero_division=1))

Classification Report:
              precision    recall  f1-score   support

        hate       1.00      1.00      1.00         1
    non-hate       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



Enter a text for classification (or 'exit' to quit):  hate


Text: hate | Prediction: ['hate']
