<a href="https://colab.research.google.com/github/venkatesh010196/naive-bayes-classifier/blob/main/naive_bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers accelerate

connect from API to huggigface to download the pretrained LLM model

In [2]:
from huggingface_hub import login
login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


downloading the llama LLM into memory to generate AI generated essays

In [None]:
from transformers import AutoTokenizer
import transformers
import torch

model = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)



setting per process GPU memory fraction using PyTorch

In [4]:
torch.cuda.set_per_process_memory_fraction(1.00)

Testing the LLM by providing a sample prompt

In [5]:
sequences = pipeline(
    'who is president of india?',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=50,
)

print(sequences)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

[{'generated_text': 'who is president of india?\n\nThe current President of India is Ram Nath Kovind. He was elected as the President of India in 2017 and took office on July 25, 2017'}]
Result: who is president of india?

The current President of India is Ram Nath Kovind. He was elected as the President of India in 2017 and took office on July 25, 2017


In [4]:
from sklearn.model_selection import StratifiedKFold
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from collections import defaultdict
from transformers import pipeline
import time

class NaiveBayes:
    def __init__(self, train_data):
        self.merged_df = self.dataAugmentation(train_data)
        self.train()
        self.wordProbList = []
        self.finalAiReverseIndexedDict = {}
        self.finalAiWordOccuranceList = []
        self.finalHumanReverseIndexedList = {}
        self.finalHumanWordOccuranceList = []

    @staticmethod
    def tokenize(text):
        stop_words = set(stopwords.words('english'))
        ps = PorterStemmer()
        words = word_tokenize(text)
        filtered_words = [ps.stem(w.lower()) for w in words if w.isalpha() and w.lower() not in stop_words]
        return filtered_words

    def preprocess_text(self, train_data_fold):
        word_lists = []
        for text in train_data_fold['text']:
            words = self.tokenize(text)
            word_lists.append(words)
        return word_lists

    #auxilury method to augement the data available by adding the essays generated by an LLM model.
    def dataAugmentation(self, merged_df):
        count = len(merged_df)
        selected_prompts = self.merged_df.sample(100)[['prompt_name', 'instructions', 'source_text']]
        generated_essays = []

        for index, row in selected_prompts.iterrows():
            prompt_name = row['prompt_name']
            instructions = row['instructions']
            source_text = row['source_text']

            message = f"Source: {source_text}\nInstructions: {instructions}"

            sequences = pipeline(
                message,
                do_sample=True,
                top_k = 5,
                num_return_sequences=3,
                eos_token_id=tokenizer.eos_token_id,
                max_length=200,
            )
            # Extract the generated essay
            generated_essay = sequences[0]['generated_text']

            # Append to the list of generated essays
            self.merged_df = self.merged_df.append({
                'id': count + 1,
                'prompt_id': f'gpt{count + 1}',
                'text': generated_essay,
                'generated': 1,
                'prompt_name': prompt_name,
                'instructions': instructions,
                'source_text': source_text
            }, ignore_index=True)

            count += 1

            # Timer to pause for 30 seconds
            time.sleep(3)

    def buildClassifier(self, train_data_fold):
        self.ReverseIndexedList, self.allWordOccuranceList = self.makeReverseIndexedDict(train_data_fold, 2)
        self.wordProbList = [self.ReverseIndexedList[word] / len(train_data_fold) for word in self.ReverseIndexedList.keys()]
        self.aiReverseIndexedList, self.aiWordOccuranceList = self.makeReverseIndexedDict(train_data_fold, 1)
        self.humanReverseIndexedList, self.humanWordOccuranceList = self.makeReverseIndexedDict(train_data_fold, 0)

    def makeReverseIndexedDict(self, data_fold, generated):
        filtered_word_lists = self.preprocess_text(data_fold)
        frequency_dict = defaultdict(int)
        # Iterate over each list of words
        for word_list in filtered_word_lists:
            # Create a set for the current list to avoid counting a word more than once
            unique_words = set(word_list)
            # Increment the count for each unique word in the current list
            for word in unique_words:
                frequency_dict[word] += 1
        frequency_dict = dict(frequency_dict)

        filtered_words = []

        if generated == 2 or generated == 0:
            for word, count in frequency_dict.items():
                # Add the word to the list if count is greater than or equal to 5
                if count >= 5:
                    filtered_words.append(word)
        else:
            filtered_words = list(frequency_dict.keys())

        reverse_indexed_dict = {word: index for index, word in enumerate(filtered_words)}
        word_occurrence_list = [frequency_dict[word] for word in reverse_indexed_dict]

        return reverse_indexed_dict, word_occurrence_list

    def train(self):
        accuracy = 0.0
        # Assume X is your feature matrix, and y is your target variable
        X = self.merged_df['text']
        y = self.merged_df['generated']
        # Split your data into train and test using StratifiedKFold
        stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        for train_index, test_index in stratified_kfold.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            self.buildClassifier(X_train)

            fold_accuracy = self.test(X_test, y_test, self.humanReverseIndexedList, self.humanWordOccuranceList,
                                      self.aiReverseIndexedList, self.humanWordOccuranceList)
            if fold_accuracy > accuracy:
                accuracy = fold_accuracy
                self.finalAiReverseIndexedDict = self.aiReverseIndexedList
                self.finalAiWordOccuranceList = self.aiWordOccuranceList
                self.finalHumanReverseIndexedList = self.humanReverseIndexedList
                self.finalHumanWordOccuranceList = self.humanWordOccuranceList

    def calculateClassProbability(self, word, ReverseIndexedList, WordOccuranceList, dataframe):
        if word in ReverseIndexedList:
            # Laplace smoothing: add 1 to the word's occurrence count and divide by the total count of unique words
            probability = (1 + WordOccuranceList[ReverseIndexedList[word]]) / (2 + len(dataframe))
        else:
            probability = 1 / (2 + len(dataframe))
        return probability

    def predict(self, wordList, humanReverseIndexedList, humanWordOccuranceList, aiReverseIndexedList,
                aiWordOccuranceList, dataframe):
        ai_class_prob = len(self.merged_df[self.merged_df['generated'] == 1]) / len(self.merged_df)
        human_class_prob = len(self.merged_df[self.merged_df['generated'] == 0]) / len(self.merged_df)

        # Calculate the likelihood for each class based on the word occurrences
        for word in set(wordList):
            human_class_prob *= self.calculateClassProbability(word, humanReverseIndexedList, humanWordOccuranceList,
                                                               self.merged_df[self.merged_df['generated'] == 0])
            ai_class_prob *= self.calculateClassProbability(word, aiReverseIndexedList, aiWordOccuranceList,
                                                            self.merged_df[self.merged_df['generated'] == 1])

        predicted_class = 0 if human_class_prob < ai_class_prob else 1

        return predicted_class

    def test(self, X_test, y_test, humanReverseIndexedList, humanWordOccuranceList, AiReverseIndexedList,
             AiWordOccuranceList):
        count = 0
        for sample in X_test:
            sample_word_list = self.tokenize(sample)
            if self.predict(sample_word_list, humanReverseIndexedList, humanWordOccuranceList, AiReverseIndexedList,
                            humanWordOccuranceList) == y_test:
                count = count + 1
        return count / len(X_test)


In [2]:
essays_df = pd.read_csv('/Users/venkatesh_vinnakota/Downloads/llm-detect-ai-generated-text/train_essays.csv')
prompts_df = pd.read_csv('/Users/venkatesh_vinnakota/Downloads/llm-detect-ai-generated-text/train_prompts.csv')
merged_df = pd.merge(essays_df, prompts_df, on='prompt_id')
classifier = NaiveBayes(merged_df)
accuracy = classifier.test()
print(f'accuracy=======>{accuracy}%')

