In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dense, TimeDistributed, Layer

# ----------- Custom CRF Layer -----------
class CRF(Layer):
    def __init__(self, num_tags, **kwargs):
        super(CRF, self).__init__(**kwargs)
        self.num_tags = num_tags

    def build(self, input_shape):
        # Transition matrix for CRF
        self.transitions = self.add_weight(
            shape=(self.num_tags, self.num_tags),
            initializer="glorot_uniform",
            trainable=True,
            name="transitions"
        )
        super(CRF, self).build(input_shape)

    def call(self, logits):
        return logits  # raw scores

    def get_loss(self, y_true, y_pred):
        return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

    def viterbi_decode(self, logits):
        # Simplified greedy decode
        return tf.argmax(logits, axis=-1)

# ----------- Model Definition -----------
max_len = 10      # keep it small for demo
n_words = 5000    # vocab size
n_tags = 7        # O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC

inp = Input(shape=(max_len,))
x = Embedding(input_dim=n_words, output_dim=50, input_length=max_len, mask_zero=True)(inp)
x = Bidirectional(LSTM(units=50, return_sequences=True))(x)
logits = TimeDistributed(Dense(n_tags))(x)

crf = CRF(n_tags)
out = crf(logits)

model = Model(inputs=inp, outputs=out)
model.compile(optimizer="adam", loss=crf.get_loss, metrics=["accuracy"])







In [3]:
import numpy as np

# Fake training data (2 examples only)
X_train = np.array([
    [10, 11, 12, 13, 0, 0, 0, 0, 0, 0],   # Elon Musk founded SpaceX
    [14, 15, 16, 17, 0, 0, 0, 0, 0, 0]    # Google is in California
])

y_train = np.array([
    [1, 2, 0, 3, 0, 0, 0, 0, 0, 0],  # B-PER I-PER O B-ORG
    [3, 0, 0, 5, 0, 0, 0, 0, 0, 0]   # B-ORG O O B-LOC
])

# Train briefly
model.fit(X_train, y_train, epochs=20, verbose=0)




<keras.src.callbacks.history.History at 0x1edc1ffd040>

In [4]:
# Mapping from indices to BIO tags
idx2tag = {
    0: "O",
    1: "B-PER",
    2: "I-PER",
    3: "B-ORG",
    4: "I-ORG",
    5: "B-LOC",
    6: "I-LOC"
}

# Input test sentences
test_sentences = [
    ["Elon", "Musk", "founded", "SpaceX"],
    ["Google", "is", "in", "California"]
]

# Mock word2idx from training
word2idx = {"Elon": 10, "Musk": 11, "founded": 12, "SpaceX": 13,
            "Google": 14, "is": 15, "in": 16, "California": 17}

# Convert to padded input
X_test = []
for sent in test_sentences:
    seq = [word2idx.get(w, 1) for w in sent]
    seq = seq + [0] * (max_len - len(seq))   # padding
    X_test.append(seq)
X_test = np.array(X_test)

# Predict
logits = model.predict(X_test)
pred_ids = np.argmax(logits, axis=-1)

# Show results
print(f"{'Input Sentence':30} {'Output Tags':30}")
print("-" * 60)
for sentence, pred in zip(test_sentences, pred_ids):
    tags = [idx2tag[t] for t in pred[:len(sentence)]]
    print(f"{' '.join(sentence):30} {' '.join(tags):30}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
Input Sentence                 Output Tags                   
------------------------------------------------------------
Elon Musk founded SpaceX       O O O O                       
Google is in California        O O O O                       
