In [None]:
import torch
print(torch.__version__)


2.6.0+cu124


In [None]:
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0

Collecting torch==2.1.0
  Downloading torch-2.1.0-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchvision==0.16.0
  Downloading torchvision-0.16.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio==2.1.0
  Downloading torchaudio-2.1.0-cp311-cp311-manylinux1_x86_64.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.1.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.1.0)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
!pip install torch-xla -f https://storage.googleapis.com/tpu-pytorch/wheels/colab.html

Looking in links: https://storage.googleapis.com/tpu-pytorch/wheels/colab.html
Collecting torch-xla
  Downloading torch_xla-2.6.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (21 kB)
Downloading torch_xla-2.6.0-cp311-cp311-manylinux_2_28_x86_64.whl (93.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.6/93.6 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-xla
Successfully installed torch-xla-2.6.0


In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [2]:
from google.colab import drive
drive.mount('/content/drive')

!pip install transformers torch scikit-learn pandas

import pandas as pd

# ✅ Update paths based on Google Drive location
human_dataset_path = "/content/drive/MyDrive/Mini Project/HumanDataset.csv"
ai_dataset_path = "/content/drive/MyDrive/Mini Project/AiDataset.csv"

# ✅ Load datasets
human_texts = pd.read_csv(human_dataset_path)
ai_texts = pd.read_csv(ai_dataset_path)

# ✅ Ensure correct column names
human_texts.columns = human_texts.columns.str.strip()
ai_texts.columns = ai_texts.columns.str.strip()

# ✅ Check if "text" column exists
if "text" not in human_texts.columns or "text" not in ai_texts.columns:
    raise KeyError("Column 'text' not found in dataset. Check CSV files.")

# ✅ Assign labels
human_texts["label"] = 0  # Human = 0
ai_texts["label"] = 1  # AI = 1

# ✅ Merge and shuffle dataset
df = pd.concat([human_texts, ai_texts]).sample(frac=1).reset_index(drop=True)

# ✅ Print first 5 rows to confirm
print(df.head())

import torch
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# ✅ Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# ✅ Ensure df["text"] is a list of strings
df["text"] = df["text"].astype(str).fillna("")  # Convert NaN to empty strings

# ✅ Split dataset
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["text"].tolist(), df["label"].tolist(), test_size=0.2, random_state=42
)

# ✅ Create Dataset Class
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(
            texts, padding=True, truncation=True, max_length=512, return_tensors="pt"
        )
        self.labels = torch.tensor(labels, dtype=torch.long)  # Ensure labels are LongTensor

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

# ✅ Convert to PyTorch Dataset
train_dataset = TextDataset(train_texts, train_labels)
test_dataset = TextDataset(test_texts, test_labels)

# ✅ Create DataLoaders (Optimized for GPU)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, pin_memory=True)

print("✅ Data preprocessing completed successfully!")

!pip install opencv-python

import os
from transformers import BertForSequenceClassification
from torch.optim import AdamW

# ✅ Check if a saved model exists
model_path = "/content/drive/MyDrive/Mini Project/saved_model"

if os.path.exists(model_path):
    print("✅ Loading pre-trained model...")
    model = BertForSequenceClassification.from_pretrained(model_path)
else:
    print("🚀 No trained model found! Initializing new model.")
    model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("✅ Model is ready!")

# ✅ Initialize optimizer
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-2)

# ✅ Training function
def train(model, train_loader, optimizer, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0

        for batch in train_loader:
            batch = {key: val.to(device) for key, val in batch.items()}

            optimizer.zero_grad()
            outputs = model(**batch)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            preds = torch.argmax(outputs.logits, dim=1)
            correct += (preds == batch["labels"]).sum().item()
            total += batch["labels"].size(0)

        train_accuracy = correct / total
        print(f"✅ Epoch {epoch+1} completed | Loss: {total_loss / len(train_loader):.4f} | Training Accuracy: {train_accuracy:.4f}")

    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)
    print(f"✅ Model training completed and saved in: {model_path}")

# ✅ Evaluation function
def evaluate(model, test_loader):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in test_loader:
            batch = {key: val.to(device) for key, val in batch.items()}

            outputs = model(**batch)
            preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            labels = batch["labels"].cpu().numpy()

            predictions.extend(preds)
            true_labels.extend(labels)

    test_accuracy = accuracy_score(true_labels, predictions)
    print(f"✅ Model Testing Accuracy: {test_accuracy:.4f}")
    print("📊 Classification Report:\n", classification_report(true_labels, predictions))

# ✅ Train the model if no saved model exists
if not os.path.exists(model_path):
    train(model, train_loader, optimizer)

# ✅ Always evaluate the model
evaluate(model, test_loader)

# ✅ Prediction function
def predict_text(text):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)

    with torch.no_grad():
        output = model(**inputs)
        prediction = torch.argmax(output.logits, dim=1).item()

    return "AI-generated" if prediction == 1 else "Human-generated"

# ✅ User input for prediction
while True:
    user_input = input("\nEnter a text to check (or type 'exit' to quit): ")
    if user_input.lower() == "exit":
        break
    print("📝 Prediction:", predict_text(user_input))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
                                                text  \
0  We conducted more studies on the CIFAR-10 data...   
1  Similar to k-nearest neighbors (k-NN), prototy...   
2  This text-to-image diffusion model draws upon ...   
3  Data innovation process (data process). This i...   
4  On the other hand, some structural information...   

                           DOI  Label  label  
0         10.1109/CVPR.2016.90    0.0      0  
1    10.1109/TCBB.2022.3140873    1.0      1  
2  10.1109/ACCESS.2024.3502628    1.0      1  
3  10.1109/MITP.2018.011291352    0.0      0  
4      10.1109/TIP.2003.819861    0.0      0  
✅ Data preprocessing completed successfully!
✅ Loading pre-trained model...
✅ Model is ready!
✅ Model Testing Accuracy: 0.9803
📊 Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.97      0.98 

KeyboardInterrupt: Interrupted by user