# AI Detector - Inference

## 1. Import Necessary Dependencies

In [29]:
import os
import torch
from sentence_transformers import SentenceTransformer, util

We should also specify `device` for GPU accelerated training (if GPU is available)

In [30]:
device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


## 2. Define `predict_similarity()` Function

- **Params:**
  - `model` -> Fine-tuned model path
  - `question` -> An unseen coding question
  - `candidate_answer` -> An unseen candidate answer
  - `ai_answer` -> An unseen AI answer
- **Returns:**
  - `similarity_score`: Predicted plagiarism score

In [31]:
def predict_similarity(model, question, candidate_answer, ai_answer):
    candidate_combined = f"Question: {question} Answer: {candidate_answer}"
    ai_combined = f"Question: {question} Answer: {ai_answer}"

    embeddings = model.encode([candidate_combined, ai_combined])
    similarity_score = util.pytorch_cos_sim(
        embeddings[0], embeddings[1]).item()

    return similarity_score

## 3. Perform Inference with Unseen Data

Load the fine-tuned model

In [32]:
# Load the trained model
model_dir = data_dir = os.path.join(
    os.path.abspath(''), os.pardir, 'models')
model_path = os.path.join(model_dir, 'fine-tuned_all-MiniLM-L6-v2')
model = SentenceTransformer(model_path, device=device)

Prepare unseen data

In [35]:
question = "Write a function to find the largest element in an array."
candidate_answer = """
def get_largest(arr):
    if not arr:
        return None
    max_element = arr[0]
    for element in arr[1:]:
        if element > max_element:
            max_element = element
    return max_element
"""
ai_answer = """
def find_largest(arr):
    return max(arr) if arr else None
"""

Predict similarity

In [34]:
similarity = predict_similarity(
        model, question, candidate_answer, ai_answer)
print(f"Similarity score: {similarity:.4f}")

Similarity score: 0.7903
