In [1]:
import pandas as pd
data=pd.read_csv('Context.csv')
data.head()

Unnamed: 0,Text,Context/Topic
0,The eternal mystique of Goldman Sachs,Politics
1,Either you don't care enough to actually tell ...,Love
2,I am such an IDIOT.,Heavy Emotion
3,While lifting weights on Friday and doing bent...,Health
4,Something's watching me,Animals


### <- We want only the Text as our input sentences. Also taking only a subset ->

In [2]:
data=data.iloc[:3000,:1]

In [3]:
data

Unnamed: 0,Text
0,The eternal mystique of Goldman Sachs
1,Either you don't care enough to actually tell ...
2,I am such an IDIOT.
3,While lifting weights on Friday and doing bent...
4,Something's watching me
...,...
2995,Internal Relationships and How they Hinder the...
2996,Hemp seed extract acted on U-87 cells by induc...
2997,What i wanna say is: Hitler is the same good a...
2998,"I built this app myself for fun, because I thi..."


# TEXT PREPROCESSING

### **Convert text to lower case**

In [4]:
data['clean_text'] = data['Text'].str.lower()
data.head()

Unnamed: 0,Text,clean_text
0,The eternal mystique of Goldman Sachs,the eternal mystique of goldman sachs
1,Either you don't care enough to actually tell ...,either you don't care enough to actually tell ...
2,I am such an IDIOT.,i am such an idiot.
3,While lifting weights on Friday and doing bent...,while lifting weights on friday and doing bent...
4,Something's watching me,something's watching me


### **Tokenization**

In [5]:
from nltk.tokenize import word_tokenize
data['clean_text'] = data['clean_text'].apply(word_tokenize)

In [6]:
data.head()

Unnamed: 0,Text,clean_text
0,The eternal mystique of Goldman Sachs,"[the, eternal, mystique, of, goldman, sachs]"
1,Either you don't care enough to actually tell ...,"[either, you, do, n't, care, enough, to, actua..."
2,I am such an IDIOT.,"[i, am, such, an, idiot, .]"
3,While lifting weights on Friday and doing bent...,"[while, lifting, weights, on, friday, and, doi..."
4,Something's watching me,"[something, 's, watching, me]"


### <- Performing stop word removal will be a hinderance to context for sentence generation ->

## Creating the ngram model (bigram)

In [7]:
from nltk import ngrams
from collections import defaultdict
def generate_ngram(words, ngrams, n):
    # Create n-grams
    for i in range(len(words) - n):
        context = tuple(words[i:i+n])
        next_word = words[i+n]
        ngrams[context].append(next_word)

In [8]:
master_ngram=defaultdict(list)
for i in data['clean_text']:
    generate_ngram(i, master_ngram, 2)

### <- master_ngram is going to store the ngram for all the sentences in the dataset ->

In [9]:
print(master_ngram)

defaultdict(<class 'list'>, {('the', 'eternal'): ['mystique'], ('eternal', 'mystique'): ['of'], ('mystique', 'of'): ['goldman'], ('of', 'goldman'): ['sachs'], ('either', 'you'): ['do'], ('you', 'do'): ["n't", 'things', "n't", 'anything', 'it', 'what', 'that', 'it', 'that', 'it', "n't", "n't", "n't", "n't"], ('do', "n't"): ['care', 'exist', 'think', 'know', 'know', 'complain', 'think', 'think', 'think', 'stop', 'know', 'make', 'understand', 'know', 'know', 'forget', 'complain', 'know', 'line', 'know', 'understand', 'understand', 'stop', 'want', 'know', 'imagine', 'control', 'let', 'like', 'like', 'have', ',', 'like', 'enjoy', 'have', 'want', 'have', 'feel', 'feel', 'have', 'know', 'want', 'help', 'go', 'take', 'usually', 'care', 'know', 'ever', 'believe', 'believe', 'have', 'want', 'like', 'spam', 'like', 'really', 'ask', 'always', 'notice', 'live', 'see', 'have', 'want', 'go', 'deny', 'consider', 'agree', 'feel', 'feel', 'know', 'know', 'waste', 'really', 'really', 'read', 'need', 'you

# Function for Generating sentences

In [14]:
import random
def generate_sentence(model, sentence, n, max_length=50):
    while len(sentence) < max_length:
        context = tuple(sentence[-n:])
        if context in model:
            next_word = random.choice(model[context])
            sentence.append(next_word)
        else:
            break
    
    return ' '.join(sentence)

# Giving input sentences

In [11]:
input_sentence1="I want to understand"
#preprocessing
input_tokenized1=word_tokenize(input_sentence1.lower())

In [12]:
# Creating ngram for the input sentence and appending it to the master_ngram
generate_ngram(input_tokenized1,master_ngram,2)

In [15]:
# Generate sentences with the input sentence
generated_sentence1=generate_sentence(master_ngram,input_tokenized1,2)

In [16]:
generated_sentence1

'i want to understand how people judge gay people for being ruthless , solitary predators , but when mommy fell he jumped out of the heavens and earth , botanical and zoological life , then just let ’ s health care bill - insurance ] [ discussion ] [ question'

### Another Input

In [22]:
input_sentence1="I want to know how someone"

#preprocessing
input_tokenized1=word_tokenize(input_sentence1.lower())

# Creating ngram for the input sentence and appending it to the master_ngram
generate_ngram(input_tokenized1,master_ngram,2)

# Generate sentences with the input sentence
generated_sentence1=generate_sentence(master_ngram,input_tokenized1,2)
generated_sentence1

"i want to know how someone can think of me for my obvious success . with no images ( i 'm sure that most of us had any interest in paganism , witchcraft , astrology etc ? ask away . i could finally breathe for a month ( whole other"

### Another Input

In [28]:
input_sentence1="I want to make a statement"

#preprocessing
input_tokenized1=word_tokenize(input_sentence1.lower())

# Creating ngram for the input sentence and appending it to the master_ngram
generate_ngram(input_tokenized1,master_ngram,2)

# Generate sentences with the input sentence
generated_sentence1=generate_sentence(master_ngram,input_tokenized1,2)
generated_sentence1

'i want to make a statement from that day i was groomed by maddona . [ removed ] an elderly couple is in denial about it all . if only jesus was here too , honestly i doubt it since your family is deathly scared of covid , but edit'