# Unigram:

In [1]:
import nltk
from nltk import word_tokenize
from nltk.probability import FreqDist

# Download necessary resources from nltk
nltk.download('punkt')

# Sample text corpus
corpus = "This is a simple example text. This text will be used to demonstrate unigrams."

# Tokenize the text
tokens = word_tokenize(corpus.lower())

# Calculate unigrams
unigrams = FreqDist(tokens)

# Output Unigrams
print("1. Unigrams:")
for word, freq in unigrams.items():
    print(f"{word}: {freq}")


1. Unigrams:
this: 2
is: 1
a: 1
simple: 1
example: 1
text: 2
.: 2
will: 1
be: 1
used: 1
to: 1
demonstrate: 1
unigrams: 1


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Student\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Bigram:

In [2]:
import nltk
from nltk import word_tokenize, bigrams
from nltk.probability import FreqDist

# Sample text corpus
corpus = "This is a simple example text. This text will be used to demonstrate bigrams."

# Tokenize the text
tokens = word_tokenize(corpus.lower())

# Calculate bigrams
bi_grams = list(bigrams(tokens))
bigram_freq = FreqDist(bi_grams)

# Output Bigrams
print("2. Bigrams:")
for bigram, freq in bigram_freq.items():
    print(f"{bigram}: {freq}")


2. Bigrams:
('this', 'is'): 1
('is', 'a'): 1
('a', 'simple'): 1
('simple', 'example'): 1
('example', 'text'): 1
('text', '.'): 1
('.', 'this'): 1
('this', 'text'): 1
('text', 'will'): 1
('will', 'be'): 1
('be', 'used'): 1
('used', 'to'): 1
('to', 'demonstrate'): 1
('demonstrate', 'bigrams'): 1
('bigrams', '.'): 1


# Trigram:

In [3]:
import nltk
from nltk import word_tokenize, trigrams
from nltk.probability import FreqDist

# Sample text corpus
corpus = "This is a simple example text. This text will be used to demonstrate trigrams."

# Tokenize the text
tokens = word_tokenize(corpus.lower())

# Calculate trigrams
tri_grams = list(trigrams(tokens))
trigram_freq = FreqDist(tri_grams)

# Output Trigrams
print("3. Trigrams:")
for trigram, freq in trigram_freq.items():
    print(f"{trigram}: {freq}")


3. Trigrams:
('this', 'is', 'a'): 1
('is', 'a', 'simple'): 1
('a', 'simple', 'example'): 1
('simple', 'example', 'text'): 1
('example', 'text', '.'): 1
('text', '.', 'this'): 1
('.', 'this', 'text'): 1
('this', 'text', 'will'): 1
('text', 'will', 'be'): 1
('will', 'be', 'used'): 1
('be', 'used', 'to'): 1
('used', 'to', 'demonstrate'): 1
('to', 'demonstrate', 'trigrams'): 1
('demonstrate', 'trigrams', '.'): 1


# Bigram Probabilities:

In [4]:
import nltk
from nltk import word_tokenize, bigrams
from nltk.probability import ConditionalFreqDist, FreqDist

# Sample text corpus
corpus = "This is a simple example text. This text will be used to demonstrate bigram probabilities."

# Tokenize the text
tokens = word_tokenize(corpus.lower())

# Calculate bigram frequencies
cfd = ConditionalFreqDist(bigrams(tokens))
unigrams = FreqDist(tokens)
bigram_prob = {w1: {w2: cfd[w1][w2] / unigrams[w1] for w2 in cfd[w1]} for w1 in cfd}

# Output Bigram Probabilities
print("4. Bigram Probabilities:")
for word1, next_words in bigram_prob.items():
    print(f"{word1}:")
    for word2, prob in next_words.items():
        print(f"  {word2}: {prob:.4f}")


4. Bigram Probabilities:
this:
  is: 0.5000
  text: 0.5000
is:
  a: 1.0000
a:
  simple: 1.0000
simple:
  example: 1.0000
example:
  text: 1.0000
text:
  .: 0.5000
  will: 0.5000
.:
  this: 0.5000
will:
  be: 1.0000
be:
  used: 1.0000
used:
  to: 1.0000
to:
  demonstrate: 1.0000
demonstrate:
  bigram: 1.0000
bigram:
  probabilities: 1.0000
probabilities:
  .: 1.0000


# Next Word Prediction:

In [5]:
import nltk
from nltk import word_tokenize, bigrams
from nltk.probability import ConditionalFreqDist

# Sample text corpus
corpus = "This is a simple example text. This text will be used to demonstrate next word prediction."

# Tokenize the text
tokens = word_tokenize(corpus.lower())

# Calculate bigram frequencies
cfd = ConditionalFreqDist(bigrams(tokens))
unigrams = FreqDist(tokens)
bigram_prob = {w1: {w2: cfd[w1][w2] / unigrams[w1] for w2 in cfd[w1]} for w1 in cfd}

# Next word prediction function
def predict_next_word(word):
    return list(bigram_prob.get(word, {}).keys())

# Test the next word prediction
test_word = "text"
print(f"5. Next word predictions for '{test_word}':")
predictions = predict_next_word(test_word)
print(predictions)


5. Next word predictions for 'text':
['.', 'will']
