In [2]:
import numpy as np
class HMM_POS_Tagger:
    def __init__(self, states, observations):
        self.states = states  # POS tags (e.g., Noun, Verb, Adj)
        self.observations = observations  # Words in the vocabulary
        self.start_prob = {}
        self.transition_prob = {}
        self.emission_prob = {}
    def train(self, tagged_sentences):
        state_count = {state: 0 for state in self.states}
        transition_count = {state: {next_state: 0 for next_state in self.states} for state in self.states}
        emission_count = {state: {obs: 0 for obs in self.observations} for state in self.states}

        for sentence in tagged_sentences:
            prev_state = None
            for word, tag in sentence:
                state_count[tag] += 1
                emission_count[tag][word] += 1

                if prev_state is None:
                    self.start_prob[tag] = self.start_prob.get(tag, 0) + 1
                else:
                    transition_count[prev_state][tag] += 1

                prev_state = tag
        # Normalize to get probabilities
        total_sentences = len(tagged_sentences)
        self.start_prob = {k: v / total_sentences for k, v in self.start_prob.items()}
        self.transition_prob = {state: {next_state: transition_count[state][next_state] / state_count[state]
                                        for next_state in self.states} for state in self.states}
        self.emission_prob = {state: {obs: emission_count[state][obs] / state_count[state]
                                      for obs in self.observations} for state in self.states}
    def viterbi(self, sentence):
        n = len(sentence)
        V = [{}]
        path = {}

        for state in self.states:
            V[0][state] = self.start_prob.get(state, 0) * self.emission_prob[state].get(sentence[0], 0)
            path[state] = [state]

        for t in range(1, n):
            V.append({})
            new_path = {}

            for state in self.states:
                (prob, prev_state) = max((V[t-1][y] * self.transition_prob[y][state] * self.emission_prob[state].get(sentence[t], 0), y) for y in self.states)
                V[t][state] = prob
                new_path[state] = path[prev_state] + [state]

            path = new_path

        (prob, final_state) = max((V[n-1][state], state) for state in self.states)
        return (prob, path[final_state])
# Example Usage
states = ['Noun', 'Verb', 'Adj']
observations = ['dog', 'barks', 'loudly', 'cat', 'runs']
tagger = HMM_POS_Tagger(states, observations)
# Training data: list of sentences with (word, POS tag) pairs
tagged_sentences = [[('dog', 'Noun'), ('barks', 'Verb'), ('loudly', 'Adj')],
                    [('cat', 'Noun'), ('runs', 'Verb')]]
tagger.train(tagged_sentences)
sentence = ['dog', 'runs']
print(tagger.viterbi(sentence))  # Outputs the most probable POS tags for the sentence

(0.25, ['Noun', 'Verb'])
