In [2]:
import pandas as pd

# Load training data
train_data = pd.read_csv('train.csv')  # replace 'train.csv' with your actual file path
prompts_train = train_data['Prompt'].tolist()
answers_train = train_data['Answer'].tolist()
labels_train = train_data['Target'].tolist()  # Target is present in train.csv

# Load test data (this does not have 'Target')
test_data = pd.read_csv('test.csv')  # replace 'test.csv' with your actual file path
prompts_test = test_data['Prompt'].tolist()
answers_test = test_data['Answer'].tolist()

# Check the first few rows of both datasets
print("Training Data:")
print(train_data.head())
print("\nTest Data:")
print(test_data.head())


Training Data:
      Id                                             Prompt  \
0  11527  [INST] You are an AI assistant that helps peop...   
1   7322  [INST] You are an AI assistant. You will be gi...   
2  11742  [INST] You are an AI assistant. You will be gi...   
3  20928  [INST] You are an AI assistant. User will you ...   
4  25830  [INST] You are an AI assistant. User will you ...   

                                              Answer  Target  
0  Step-by-step reasoning process:\n1. Randy spen...       0  
1  What is the temperature at which hypothermia b...       0  
2  Answer: c) No. \n\nThe hypothesis is false bec...       0  
3                                         Prismatoid       0  
4                                             Case B       0  

Test Data:
      Id                                             Prompt  \
0  20568  [INST] You are an AI assistant. You will be gi...   
1  17686  question:Question: This article: According to ...   
2  13035  [INST] You are an

In [3]:
from transformers import BertTokenizer, BertModel
import torch

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Set the model to evaluation mode (disable gradient calculation)
model.eval()


  from .autonotebook import tqdm as notebook_tqdm


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

In [4]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Load data (train.csv)
data = pd.read_csv('train.csv')

# Load the precomputed combined embeddings
combined_embeddings_train = np.load('combined_embeddings_train.npy')

# Get labels (Targets) from your dataset
labels_train = data['Target'].tolist()

# Filter labels to match filtered prompts and answers
filtered_prompts = data['Prompt'].dropna().str.strip().tolist()
filtered_answers = data['Answer'].dropna().str.strip().tolist()

filtered_labels = []
for i, (prompt, answer) in enumerate(zip(filtered_prompts, filtered_answers)):
    if isinstance(prompt, str) and isinstance(answer, str) and prompt and answer:
        filtered_labels.append(labels_train[i])

# Convert the filtered labels to a NumPy array
labels_train_np = np.array(filtered_labels)
print("Filtered Labels shape:", labels_train_np.shape)

# Split the combined embeddings and labels into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(combined_embeddings_train, labels_train_np, test_size=0.2, random_state=42)

# Print shapes to confirm
print("Training data shape:", X_train.shape)
print("Validation data shape:", X_val.shape)
print("Training labels shape:", y_train.shape)
print("Validation labels shape:", y_val.shape)


Filtered Labels shape: (16668,)
Training data shape: (13334, 1536)
Validation data shape: (3334, 1536)
Training labels shape: (13334,)
Validation labels shape: (3334,)


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Logistic Regression model
clf = LogisticRegression(max_iter=1000)  # Increase max_iter to ensure convergence

# Train the classifier on the training set
print("Training the Logistic Regression classifier...")
clf.fit(X_train, y_train)

# Predict on the validation set
y_pred = clf.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report:")
print(classification_report(y_val, y_pred, target_names=['No Hallucination', 'Hallucination']))


Training the Logistic Regression classifier...
Validation Accuracy: 94.69%
Classification Report:
                  precision    recall  f1-score   support

No Hallucination       0.95      0.99      0.97      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Random Forest model
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)  # Set random_state for reproducibility

# Train the classifier on the training set
print("Training the Random Forest classifier...")
rf_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_rf = rf_clf.predict(X_val)

# Evaluate the model
accuracy_rf = accuracy_score(y_val, y_pred_rf)
print(f"Validation Accuracy (Random Forest): {accuracy_rf * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Random Forest):")
print(classification_report(y_val, y_pred_rf, target_names=['No Hallucination', 'Hallucination']))


Training the Random Forest classifier...
Validation Accuracy (Random Forest): 95.29%
Classification Report (Random Forest):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Support Vector Machine model
svm_clf = SVC(kernel='linear', max_iter=1000, random_state=42)  # Use a linear kernel and set max_iter for convergence

# Train the classifier on the training set
print("Training the SVM classifier...")
svm_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_svm = svm_clf.predict(X_val)

# Evaluate the model
accuracy_svm = accuracy_score(y_val, y_pred_svm)
print(f"Validation Accuracy (SVM): {accuracy_svm * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (SVM):")
print(classification_report(y_val, y_pred_svm, target_names=['No Hallucination', 'Hallucination']))


Training the SVM classifier...




Validation Accuracy (SVM): 63.41%
Classification Report (SVM):
                  precision    recall  f1-score   support

No Hallucination       0.95      0.65      0.77      3177
   Hallucination       0.04      0.31      0.07       157

        accuracy                           0.63      3334
       macro avg       0.50      0.48      0.42      3334
    weighted avg       0.91      0.63      0.74      3334



In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the k-Nearest Neighbors model
knn_clf = KNeighborsClassifier(n_neighbors=5)  # Set k (number of neighbors) to 5

# Train the classifier on the training set
print("Training the k-Nearest Neighbors classifier...")
knn_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_knn = knn_clf.predict(X_val)

# Evaluate the model
accuracy_knn = accuracy_score(y_val, y_pred_knn)
print(f"Validation Accuracy (k-NN): {accuracy_knn * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (k-NN):")
print(classification_report(y_val, y_pred_knn, target_names=['No Hallucination', 'Hallucination']))


Training the k-Nearest Neighbors classifier...
Validation Accuracy (k-NN): 95.17%
Classification Report (k-NN):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Decision Tree model
dt_clf = DecisionTreeClassifier(random_state=42)  # Set random_state for reproducibility

# Train the classifier on the training set
print("Training the Decision Tree classifier...")
dt_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_dt = dt_clf.predict(X_val)

# Evaluate the model
accuracy_dt = accuracy_score(y_val, y_pred_dt)
print(f"Validation Accuracy (Decision Tree): {accuracy_dt * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Decision Tree):")
print(classification_report(y_val, y_pred_dt, target_names=['No Hallucination', 'Hallucination']))


Training the Decision Tree classifier...
Validation Accuracy (Decision Tree): 88.78%
Classification Report (Decision Tree):
                  precision    recall  f1-score   support

No Hallucination       0.95      0.93      0.94      3177
   Hallucination       0.06      0.09      0.07       157

        accuracy                           0.89      3334
       macro avg       0.51      0.51      0.50      3334
    weighted avg       0.91      0.89      0.90      3334



In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Naive Bayes model
nb_clf = GaussianNB()

# Train the classifier on the training set
print("Training the Naive Bayes classifier...")
nb_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_nb = nb_clf.predict(X_val)

# Evaluate the model
accuracy_nb = accuracy_score(y_val, y_pred_nb)
print(f"Validation Accuracy (Naive Bayes): {accuracy_nb * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Naive Bayes):")
print(classification_report(y_val, y_pred_nb, target_names=['No Hallucination', 'Hallucination']))


Training the Naive Bayes classifier...
Validation Accuracy (Naive Bayes): 70.94%
Classification Report (Naive Bayes):
                  precision    recall  f1-score   support

No Hallucination       0.96      0.73      0.83      3177
   Hallucination       0.06      0.34      0.10       157

        accuracy                           0.71      3334
       macro avg       0.51      0.54      0.46      3334
    weighted avg       0.92      0.71      0.79      3334



In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Gradient Boosting model
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the classifier on the training set
print("Training the Gradient Boosting classifier...")
gb_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_gb = gb_clf.predict(X_val)

# Evaluate the model
accuracy_gb = accuracy_score(y_val, y_pred_gb)
print(f"Validation Accuracy (Gradient Boosting): {accuracy_gb * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Gradient Boosting):")
print(classification_report(y_val, y_pred_gb, target_names=['No Hallucination', 'Hallucination']))


Training the Gradient Boosting classifier...
Validation Accuracy (Gradient Boosting): 95.11%
Classification Report (Gradient Boosting):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.97      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the AdaBoost model
ada_clf = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=42)

# Train the classifier on the training set
print("Training the AdaBoost classifier...")
ada_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_ada = ada_clf.predict(X_val)

# Evaluate the model
accuracy_ada = accuracy_score(y_val, y_pred_ada)
print(f"Validation Accuracy (AdaBoost): {accuracy_ada * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (AdaBoost):")
print(classification_report(y_val, y_pred_ada, target_names=['No Hallucination', 'Hallucination']))


Training the AdaBoost classifier...




Validation Accuracy (AdaBoost): 95.23%
Classification Report (AdaBoost):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the MLP (Neural Network) model
mlp_clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)

# Train the classifier on the training set
print("Training the MLP classifier...")
mlp_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_mlp = mlp_clf.predict(X_val)

# Evaluate the model
accuracy_mlp = accuracy_score(y_val, y_pred_mlp)
print(f"Validation Accuracy (MLP): {accuracy_mlp * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (MLP):")
print(classification_report(y_val, y_pred_mlp, target_names=['No Hallucination', 'Hallucination']))


Training the MLP classifier...
Validation Accuracy (MLP): 93.43%
Classification Report (MLP):
                  precision    recall  f1-score   support

No Hallucination       0.95      0.98      0.97      3177
   Hallucination       0.06      0.03      0.04       157

        accuracy                           0.93      3334
       macro avg       0.51      0.50      0.50      3334
    weighted avg       0.91      0.93      0.92      3334



In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Linear Discriminant Analysis model
lda_clf = LinearDiscriminantAnalysis()

# Train the classifier on the training set
print("Training the LDA classifier...")
lda_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_lda = lda_clf.predict(X_val)

# Evaluate the model
accuracy_lda = accuracy_score(y_val, y_pred_lda)
print(f"Validation Accuracy (LDA): {accuracy_lda * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (LDA):")
print(classification_report(y_val, y_pred_lda, target_names=['No Hallucination', 'Hallucination']))


Training the LDA classifier...
Validation Accuracy (LDA): 93.91%
Classification Report (LDA):
                  precision    recall  f1-score   support

No Hallucination       0.95      0.98      0.97      3177
   Hallucination       0.06      0.02      0.03       157

        accuracy                           0.94      3334
       macro avg       0.51      0.50      0.50      3334
    weighted avg       0.91      0.94      0.92      3334



In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Quadratic Discriminant Analysis model
qda_clf = QuadraticDiscriminantAnalysis()

# Train the classifier on the training set
print("Training the QDA classifier...")
qda_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_qda = qda_clf.predict(X_val)

# Evaluate the model
accuracy_qda = accuracy_score(y_val, y_pred_qda)
print(f"Validation Accuracy (QDA): {accuracy_qda * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (QDA):")
print(classification_report(y_val, y_pred_qda, target_names=['No Hallucination', 'Hallucination']))


Training the QDA classifier...




Validation Accuracy (QDA): 95.29%
Classification Report (QDA):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Extra Trees model
et_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training set
print("Training the Extra Trees classifier...")
et_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_et = et_clf.predict(X_val)

# Evaluate the model
accuracy_et = accuracy_score(y_val, y_pred_et)
print(f"Validation Accuracy (Extra Trees): {accuracy_et * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Extra Trees):")
print(classification_report(y_val, y_pred_et, target_names=['No Hallucination', 'Hallucination']))


Training the Extra Trees classifier...
Validation Accuracy (Extra Trees): 95.29%
Classification Report (Extra Trees):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier

# Initialize the base classifier (e.g., DecisionTree)
base_clf = DecisionTreeClassifier(random_state=42)

# Initialize the Bagging model using the base classifier
bagging_clf = BaggingClassifier(base_clf, n_estimators=100, random_state=42)

# Train the classifier on the training set
print("Training the Bagging classifier...")
bagging_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_bagging = bagging_clf.predict(X_val)

# Evaluate the model
accuracy_bagging = accuracy_score(y_val, y_pred_bagging)
print(f"Validation Accuracy (Bagging): {accuracy_bagging * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Bagging):")
print(classification_report(y_val, y_pred_bagging, target_names=['No Hallucination', 'Hallucination']))


Training the Bagging classifier...


KeyboardInterrupt: 

In [7]:
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Ridge Classifier model
ridge_clf = RidgeClassifier(alpha=1.0)

# Train the classifier on the training set
print("Training the Ridge Classifier...")
ridge_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_ridge = ridge_clf.predict(X_val)

# Evaluate the model
accuracy_ridge = accuracy_score(y_val, y_pred_ridge)
print(f"Validation Accuracy (Ridge Classifier): {accuracy_ridge * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Ridge Classifier):")
print(classification_report(y_val, y_pred_ridge, target_names=['No Hallucination', 'Hallucination']))


Training the Ridge Classifier...
Validation Accuracy (Ridge Classifier): 95.29%
Classification Report (Ridge Classifier):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.98      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Perceptron model
perceptron_clf = Perceptron(max_iter=1000, random_state=42)

# Train the classifier on the training set
print("Training the Perceptron classifier...")
perceptron_clf.fit(X_train, y_train)

# Predict on the validation set
y_pred_perceptron = perceptron_clf.predict(X_val)

# Evaluate the model
accuracy_perceptron = accuracy_score(y_val, y_pred_perceptron)
print(f"Validation Accuracy (Perceptron): {accuracy_perceptron * 100:.2f}%")

# Get a detailed classification report (precision, recall, F1-score)
print("Classification Report (Perceptron):")
print(classification_report(y_val, y_pred_perceptron, target_names=['No Hallucination', 'Hallucination']))


Training the Perceptron classifier...
Validation Accuracy (Perceptron): 95.11%
Classification Report (Perceptron):
                  precision    recall  f1-score   support

No Hallucination       0.95      1.00      0.97      3177
   Hallucination       0.00      0.00      0.00       157

        accuracy                           0.95      3334
       macro avg       0.48      0.50      0.49      3334
    weighted avg       0.91      0.95      0.93      3334

