<a href="https://colab.research.google.com/github/vsingh9076/Natural_Language_Processing/blob/master/Text%20Completion/text_completion_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Based on Keyword matching

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False

class AutocompleteSystem:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end_of_word = True

    def find_words_with_prefix(self, prefix):
        node = self.root
        for char in prefix:
            if char not in node.children:
                return []
            node = node.children[char]
        return self._find_words_from_node(node, prefix)

    def _find_words_from_node(self, node, prefix):
        words = []
        if node.is_end_of_word:
            words.append(prefix)
        for char, child_node in node.children.items():
            words.extend(self._find_words_from_node(child_node, prefix + char))
        return words

# Example usage
autocomplete = AutocompleteSystem()
words = ["apple", "app", "application", "banana", "bat"]
for word in words:
    autocomplete.insert(word)

prefix = "app"
completions = autocomplete.find_words_with_prefix(prefix)
print(completions)  # Output: ['app', 'apple', 'application']


['app', 'apple', 'application']


In [15]:
# Using n-grams probabilistic model

import nltk
from nltk import ngrams
from collections import Counter

# Sample text data
text = "This is a sample text for building an n-gram model. This is just an example."

# Tokenize the text
tokens = nltk.word_tokenize(text)

# Create n-grams
def generate_ngrams(token_list, n):
    return list(ngrams(token_list, n))

n = 3  # Example: Trigrams
ngram_list = generate_ngrams(tokens, n)

# Count the frequency of n-grams
ngram_counts = Counter(ngram_list)

# Autocompletion function
def predict_completions(input_sequence, n, ngram_counts):
    input_ngram = tuple(input_sequence.split()[-(n-1):])
    completions = [ngram for ngram, count in ngram_counts.items() if ngram[:n-1] == input_ngram]
    return completions

# Example usage
input_sequence = "This is"
completions = predict_completions(input_sequence, n, ngram_counts)
print(completions)

[('This', 'is', 'a'), ('This', 'is', 'just')]
