# AI Detector - Evaluation

## 1. Import Necessary Dependencies

In [6]:
import os
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sentence_transformers import SentenceTransformer, util

We should also specify `device` for GPU accelerated training (if GPU is available)

In [7]:
device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


## 2. Define `evaluate_model()` function

- **Params:**
  - `model` -> Fine-tuned model path
  - `df` -> Preprocessed data
- **Returns:**
  - `mse` -> Mean squared error
  - `rmse` -> Root mean squared error
  - `mae` -> Mean absolute error
  - `r2` -> r-squared error

In [8]:
def evaluate_model(model, df):
    # Encode all sentences
    candidate_embeddings = model.encode(df['candidate_combined'].tolist())
    ai_embeddings = model.encode(df['ai_combined'].tolist())

    # Calculate cosine similarities
    cosine_scores = [util.pytorch_cos_sim(c, a).item(
    ) for c, a in zip(candidate_embeddings, ai_embeddings)]

    # Calculate evaluation metrics
    mse = mean_squared_error(df['similarity_score'], cosine_scores)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(df['similarity_score'], cosine_scores)
    r2 = r2_score(df['similarity_score'], cosine_scores)

    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R-squared Score: {r2:.4f}")

    return mse, rmse, mae, r2

## 3. Evaluate the Model
Load the exported model

In [9]:
# Load the train data
data_dir = os.path.join(os.path.abspath(''), os.pardir, 'data')
df = pd.read_csv(os.path.join(data_dir, 'preprocessed_data.csv'))

# Define model export/output path
model_dir = os.path.join(
    os.path.abspath(''), os.pardir, 'models')
model_path = os.path.join(model_dir, 'fine-tuned_all-MiniLM-L6-v2')

Evaluate the Model

In [10]:
model = SentenceTransformer(model_path, device=device)
mse, rmse, mae, r2 = evaluate_model(model, df)

Mean Squared Error: 0.0168
Root Mean Squared Error: 0.1296
Mean Absolute Error: 0.0854
R-squared Score: 0.7651
