Q1) Task: Given a sequence of alphabets (with some missing values), use an RNN and a Bidirectional RNN model to predict the missing values in the sequence.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Bidirectional, Dense, Embedding
from sklearn.model_selection import train_test_split

# Step 1: Character to Integer Mapping
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
char_to_int = {c: i+1 for i, c in enumerate(alphabet)}  # A=1, ..., Z=26
char_to_int['_'] = 0  # Missing character
int_to_char = {i: c for c, i in char_to_int.items()}

# Step 2: Dataset - Original Word Sequences
base_sequences = [
    "MACHINE", "LEARNING", "NETWORK", "SCIENCE", "DATA",
    "COMPUTER", "ALGORITHM", "PROCESS", "SYSTEM", "MODEL",
]

# Step 3: Introduce missing characters (with forced corruption if none missing)
def introduce_missing(sequences, missing_prob=0.2):
    corrupted_sequences = []
    for seq in sequences:
        seq_list = list(seq)
        corrupted = False
        for i in range(len(seq_list)):
            if np.random.rand() < missing_prob:
                seq_list[i] = '_'
                corrupted = True
        if not corrupted:
            rand_index = np.random.randint(len(seq_list))
            seq_list[rand_index] = '_'
        corrupted_sequences.append(''.join(seq_list))
    return corrupted_sequences

corrupted_sequences = introduce_missing(base_sequences, missing_prob=0.1)
print("Corrupted Sequences:", corrupted_sequences)

# Step 4: Convert sequences to integer format
encoded_sequences = [[char_to_int[char] for char in seq] for seq in corrupted_sequences]

# Step 5: Create supervised input-output pairs (X, y)
X, y = [], []
for orig, corrupted in zip(base_sequences, encoded_sequences):
    for i, val in enumerate(corrupted):
        if val == 0:
            input_seq = corrupted.copy()
            X.append(input_seq)
            y.append(char_to_int[orig[i]])

# Step 6: Padding only (NO normalization)
max_len = max(len(seq) for seq in X)
X = tf.keras.preprocessing.sequence.pad_sequences(X, maxlen=max_len, padding='pre')
y = np.array(y)

# Step 7: Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 8: Define RNN and BiRNN models using Embedding
def build_rnn_model():
    model = Sequential([
        Embedding(input_dim=28, output_dim=16, input_length=max_len),
        SimpleRNN(150),
        Dense(28, activation='softmax')  # 0–27 (0=_ missing, 1–26 = A–Z)
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def build_birnn_model():
    model = Sequential([
        Embedding(input_dim=28, output_dim=16, input_length=max_len),
        Bidirectional(SimpleRNN(150)),
        Dense(28, activation='softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Step 9: Train models
rnn = build_rnn_model()
birnn = build_birnn_model()

print("\nTraining RNN...")
rnn.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=150, batch_size=16, verbose=0)

print("\nTraining Bidirectional RNN...")
birnn.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=150, batch_size=16, verbose=0)

# Step 10: Prediction logic
def fill_missing(model, seq):
    chars = list(seq)
    while '_' in chars:
        int_seq = [char_to_int[c] if c != '_' else 0 for c in chars]
        padded = tf.keras.preprocessing.sequence.pad_sequences([int_seq], maxlen=max_len, padding='pre')
        pred = model.predict(padded, verbose=0)[0]
        predicted_char = int_to_char[np.argmax(pred)]
        missing_idx = chars.index('_')
        chars[missing_idx] = predicted_char
    return ''.join(chars)

# Step 11: Test examples
print("\n--- Testing ---")
test_samples = corrupted_sequences[:3]
new_test = "_ACHINE"
test_samples.append(new_test)

for seq in test_samples:
    print(f"Input : {seq}")
    print(f"RNN Prediction Output:     {fill_missing(rnn, seq)}")
    print(f"BiRNN Prediction Output:   {fill_missing(birnn, seq)}\n")



Corrupted Sequences: ['MACH_NE', 'LE_RNING', 'NETW_RK', 'S_IENCE', 'DA_A', '_OMP_TER', 'ALGO_ITHM', 'P_OCESS', 'SY_TEM', '_ODEL']

Training RNN...

Training Bidirectional RNN...

--- Testing ---
Input : MACH_NE
RNN Prediction Output:     MACHTNE
BiRNN Prediction Output:   MACHCNE

Input : LE_RNING
RNN Prediction Output:     LEARNING
BiRNN Prediction Output:   LEARNING

Input : NETW_RK
RNN Prediction Output:     NETWORK
BiRNN Prediction Output:   NETWORK

Input : _ACHINE
RNN Prediction Output:     TACHINE
BiRNN Prediction Output:   CACHINE



Q2) Predict the next word in a sentence using an RNN

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 1: Dataset
sentences = [
    "The cat sat on the mat.",
    "The dog sat on the rug.",
    "The bird flew in the sky.",
    "The cat jumped over the fence.",
    "The dog barked at the mailman.",
    "The bird landed on the branch.",
    "The cat chased a butterfly.",
    "The dog dug a hole in the yard.",
    "The bird sang from the rooftop."
]

# Step 2: Text Preprocessing
# Tokenize the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index  # Dictionary mapping words to integers
vocab_size = len(word_index) + 1  # +1 for padding (index 0)

# Convert sentences to sequences of integers
sequences = tokenizer.texts_to_sequences(sentences)

# Create input-output pairs for training
X_train, y_train = [], []
for seq in sequences:
    for i in range(1, len(seq)):
        X_train.append(seq[:i])  # Previous words
        y_train.append(seq[i])   # Next word

# Pad sequences to ensure uniform length
max_len = max(len(seq) for seq in sequences)  # Max sentence length
X_train = pad_sequences(X_train, maxlen=max_len, padding='pre')
y_train = np.array(y_train)

# Step 3: Build the RNN Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=max_len),  # Word embeddings
    SimpleRNN(150, return_sequences=False),  # Simple RNN layer
    Dense(vocab_size, activation='softmax')  # Output layer for word prediction
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 4: Train the Model
print("Training the RNN model...")
model.fit(X_train, y_train, epochs=100, verbose=0)

# Step 5: Prediction Function
def predict_next_word(sentence):
    # Tokenize and pad the input sentence
    seq = tokenizer.texts_to_sequences([sentence])[0]
    padded_seq = pad_sequences([seq], maxlen=max_len, padding='pre')

    # Predict the next word
    pred = model.predict(padded_seq, verbose=0)
    pred_word_idx = np.argmax(pred)
    return tokenizer.index_word.get(pred_word_idx, "Unknown")

# Step 6: Test Prediction
test_sentence_1 = "The cat sat on"
test_sentence_2 = "a dog barked"
predicted_word_1 = predict_next_word(test_sentence_1)
predicted_word_2 = predict_next_word(test_sentence_2)
print(f"\nInput Sentence: '{test_sentence_1}'")
print(f"Predicted Next Word: '{predicted_word_1}'")
print(f"\nInput Sentence: '{test_sentence_2}'")
print(f"Predicted Next Word: '{predicted_word_2}'")

# Optional: Show model summary
model.summary()

Training the RNN model...

Input Sentence: 'The cat sat on'
Predicted Next Word: 'the'

Input Sentence: 'a dog barked'
Predicted Next Word: 'at'


Q3) Build an RNN-based sequence generator to predict the next note in a series of Indian classical music notes. Begin with the basic scale (Sa, Re, Ga, Ma, Pa, Dha, Ni, Sha), generate training sequences, and convert the notes to numerical form for model input. Train the RNN to learn note transitions and extend the model to generate melodic sequences for ragas like Bhairav, Bhopali, and Bageshree

In [4]:
import numpy as np, random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

ragas = {
    'basic':    ['Sa', 'Re', 'Ga', 'Ma', 'Pa', 'Dha', 'Ni', 'Sha'],
    'bhopali':  ['Sa', 'Re', 'Ga', 'Pa', 'Dha', 'Sha'],
    'bageshree':['Sa', 'Re', 'Ga', 'Ma', 'Dha', 'Ni', 'Sha'],
    'bhairav':  ['Sa', 'Re♭', 'Ga', 'Ma', 'Pa', 'Dha♭', 'Ni', 'Sha']
}

def generate_sequences(notes, count=100, length=5):
    return [random.choices(notes, k=length) for _ in range(count)]

def prepare_data(sequences, notes):
    note_to_idx = {note: i for i, note in enumerate(notes)}
    idx_to_note = {i: note for note, i in note_to_idx.items()}
    X, y = [], []
    for seq in sequences:
        for i in range(1, len(seq)):
            X.append([note_to_idx[n] for n in seq[:i]])
            y.append(note_to_idx[seq[i]])
    max_len = max(len(s) for s in X)
    X = pad_sequences(X, maxlen=max_len)
    y = to_categorical(y, num_classes=len(notes))
    return X, y, note_to_idx, idx_to_note, max_len

def build_model(vocab_size):
    model = Sequential([
        Embedding(vocab_size, 10),
        SimpleRNN(64),
        Dense(vocab_size, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_generate(raga_name, notes):
    print(f"\nRaga: {raga_name.upper()}")
    sequences = generate_sequences(notes)
    X, y, n2i, i2n, maxlen = prepare_data(sequences, notes)
    model = build_model(len(notes))
    model.fit(X, y, epochs=30, verbose=0)

    # Fixed seed input
    seed = ['Sha', 'Ga', 'Sa']
    print("Seed:", seed)
    generated = seed.copy()

    for _ in range(5):  # generate next 5 notes only
        encoded = [n2i[n] for n in generated[-maxlen:]]
        padded = pad_sequences([encoded], maxlen=maxlen)
        pred_idx = np.argmax(model.predict(padded, verbose=0))
        generated.append(i2n[pred_idx])

    print("Next 5 Notes:", generated[len(seed):])

for raga in ragas:
    train_and_generate(raga, ragas[raga])



Raga: BASIC
Seed: ['Sha', 'Ga', 'Sa']
Next 5 Notes: ['Pa', 'Re', 'Pa', 'Sha', 'Ga']

Raga: BHOPALI
Seed: ['Sha', 'Ga', 'Sa']
Next 5 Notes: ['Pa', 'Re', 'Sha', 'Re', 'Pa']

Raga: BAGESHREE
Seed: ['Sha', 'Ga', 'Sa']
Next 5 Notes: ['Ma', 'Dha', 'Sa', 'Re', 'Dha']

Raga: BHAIRAV
Seed: ['Sha', 'Ga', 'Sa']
Next 5 Notes: ['Re♭', 'Dha♭', 'Re♭', 'Ga', 'Ga']
