In [1]:
!pip install torch
!pip install transformers
!pip install numpy



In [16]:
import requests
import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from torch.nn.functional import softmax

# AI Detector API
def ai_detector(generated_text):
    gptzero_api_key = ''
    url = "https://api.gptzero.me/v2/predict/text"
    payload = {"document": generated_text, "version": "2024-04-04"}
    headers = {
        "x-api-key": gptzero_api_key,
        "Content-Type": "application/json",
        "Accept": "application/json"
    }
    response = requests.post(url, json=payload, headers=headers)
    if response.status_code == 200:
        data = response.json()
        if data['documents']:
            class_probabilities = data['documents'][0]['class_probabilities']
            return class_probabilities
        else:
            return {'ai': 0, 'human': 0, 'mixed': 0}
    else:
        return {'ai': 0, 'human': 0, 'mixed': 0}

# Load pre-trained models
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
detector_model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
mlm_model = pipeline("fill-mask", model="bert-base-uncased")

# Compute gradients for word importance
def compute_gradients(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    # Convert input IDs to embeddings
    embeddings = model.bert.embeddings.word_embeddings(inputs['input_ids'])
    embeddings.retain_grad()  # Retain gradients for embeddings
    outputs = model(inputs_embeds=embeddings, attention_mask=inputs['attention_mask'])
    loss = torch.nn.CrossEntropyLoss()(outputs.logits, torch.tensor([1]).to(outputs.logits.device))  # Ensure target tensor is on the same device
    loss.backward()
    gradients = embeddings.grad
    return gradients

# Calculate word importance scores
def calculate_word_importance(gradients, tokenizer, input_ids):
    token_importance = gradients.abs().sum(dim=-1).squeeze().cpu().numpy()
    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().cpu().numpy())
    return dict(zip(tokens, token_importance))

# Generate adversarial example
def generate_adversarial_example(text, importance_scores, mlm_model, num_replacements=5):
    tokens = tokenizer.tokenize(text)
    sorted_importance = sorted(importance_scores.items(), key=lambda x: x[1], reverse=True)
    top_tokens = [word for word, _ in sorted_importance[:num_replacements]]

    # Replace only non-special tokens
    for token in top_tokens:
        if token in tokens:
            mask_index = tokens.index(token)
            tokens[mask_index] = tokenizer.mask_token
            masked_text = tokenizer.convert_tokens_to_string(tokens)
            suggestions = mlm_model(masked_text)
            best_replacement = suggestions[0]['token_str']
            tokens[mask_index] = best_replacement

    return tokenizer.convert_tokens_to_string(tokens)

# Main script
text = "Biodiversity is vital for ecosystem health and stability, providing resilience against environmental changes and supporting a wide range of ecosystem services. It ensures food security, medicinal resources, and maintains natural cycles, contributing to overall planetary well-being."
gradients = compute_gradients(text, detector_model, tokenizer)
input_ids = tokenizer(text, return_tensors='pt')['input_ids']
importance_scores = calculate_word_importance(gradients, tokenizer, input_ids)

# Generate adversarial example
adversarial_example = generate_adversarial_example(text, importance_scores, mlm_model)

# Evaluate original and adversarial examples using the AI detector API
original_score = ai_detector(generated_text=text)
adversarial_score = ai_detector(generated_text=adversarial_example)

print("Original Text:", text)
print("Original Score:", original_score)
print("Adversarial Text:", adversarial_example)
print("Adversarial Score:", adversarial_score)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Original Text: Biodiversity is vital for ecosystem health and stability, providing resilience against environmental changes and supporting a wide range of ecosystem services. It ensures food security, medicinal resources, and maintains natural cycles, contributing to overall planetary well-being.
Original Score: {'ai': 1, 'human': 0, 'mixed': 0}
Adversarial Text: it is responsible for maintaining health and stability , providing resilience against environmental changes and supporting a wide range of ecosystem services . it ensures food security , medicinal resources , and maintains natural cycles , contributing to overall human well - being .
Adversarial Score: {'ai': 0.4200164068908942, 'human': 0.5225594749794913, 'mixed': 0.05742411812961444}
