In [487]:
import os
import json
import numpy as np

In [488]:
def fetch_data():
    with open("./data/train.jsonl", 'r') as f:
        train_data = [json.loads(line) for line in f]

    tag_mapping = {1: "NN", 2: "DT", 3: "JJ", 4: "OT"}

    # Preprocess training data
    train_sentences = []
    train_labels = []
    for entry in train_data:
        tokens = entry['tokens']
        pos_tags = entry['pos_tags']
        chunk_tags = np.array(entry['chunk_tags'])
        
        train_sentences.append(pos_tags)
        train_labels.append(chunk_tags)

    with open("./data/test.jsonl", 'r') as f:
        test_data = [json.loads(line) for line in f]
    # Preprocess training data
    test_sentences = []
    test_labels = []
    for entry in test_data:
        tokens = entry['tokens']
        pos_tags = entry['pos_tags']
        chunk_tags = np.array(entry['chunk_tags'])
        
        test_sentences.append(pos_tags)
        test_labels.append(chunk_tags)
    
    return train_sentences, test_sentences,  train_labels, test_labels

X_train, X_test, y_train, y_test= fetch_data()


In [489]:
y_train[0]

array([1, 1, 1, 0, 1, 1, 1, 0, 1])

In [490]:
def one_hot_encode(input_list):
    encoded_list = []
    for item in input_list:
        one_hot_vector = np.zeros(4)
        one_hot_vector[item - 1] = 1  # Adjust index to start from 0
        encoded_list.append(one_hot_vector.tolist())
    return np.array(encoded_list)

In [491]:
one_hot_encode(X_train[0])

array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.]])

In [492]:
np.concatenate([np.array([1,0,0]), np.array([1,0])])

array([1, 0, 0, 1, 0])

In [493]:
def into_ho(X_train):
    X_train_ho = []
    for i in range(len(X_train)):
        X = one_hot_encode(X_train[i])
        temp = []
        for j in range(len(X)):
            if j==0:
                temp.append(np.concatenate([np.array([1.0,0.0,0.0,0.0,0.0]), X[j]]))
            else:
                temp.append((np.concatenate([np.array([0]), X[j-1], X[j]] )))
        X_train_ho.append(np.array(temp))
    return X_train_ho

In [494]:
X_train_ho = into_ho(X_train)
X_test_ho = into_ho(X_test)


In [495]:
# print(X_train[0],"\n",X_train_ho[0])
# X_train_ho[0][0].shape

In [496]:
def sigmoid(x):
    return 1/(1.0+(np.exp(-x)))

In [497]:
np.concatenate([np.array([0]), X_train_ho[0][0]])

array([0., 1., 0., 0., 0., 0., 1., 0., 0., 0.])

In [498]:
8.9*np.ones(10)

array([8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.9])

In [499]:
def binary_crossentropy_loss(y_true, y_pred, epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    total_loss = np.sum(loss)
    return total_loss

In [500]:
class SingleRecurrentPerceptron:
    def __init__(self, vec_len, lr):
          
        # Initialize weights and bias
        self.weights = np.zeros(vec_len)
        self.threshold = np.random.randn(1)
        self.lr = lr

   
    def forward(self, inputs):
        """inputs-- (B, Tx, 10)"""   
        prediction= []    #(B, Tx)
        X_i_b = []      #(B, Tx, 10)
        for j in range(len(inputs)):
            out=[]
            X_i_j = []
            Tx, _ = inputs[j].shape
            y_prev=0
            for i in range(Tx):
                x = np.concatenate([inputs[j][i], np.array([y_prev])])
                X_i_j.append(x)
                net = x.T @ self.weights - self.threshold[0]
                oi = sigmoid(net)
                y_prev = oi
                out.append(oi)
            prediction.append(np.array(out))
            X_i_b.append(np.array(X_i_j))
        return X_i_b, prediction
    
    def forward_per_input(self, inputs):
        """inputs-- (Tx, 10) """   
        out=[]    #(Tx, 1)
        X_i_j = []  #(Tx, 10)
        Tx = len(inputs)
        y_prev=0
        for i in range(Tx):
            x = np.concatenate([inputs[i], np.array([y_prev])])
            X_i_j.append(x)
            x = x.T @ self.weights - self.threshold[0]
            x = sigmoid(x)
            y_prev = x
            out.append(x)

        return np.array(X_i_j), out

    def backward(self, inputs, target):
        """inputs-- (B, Tx, 10)
           target-- (B, Tx)
            """   
        X, prediction = self.forward(inputs)

        for i in range(len(inputs)):      # iterate over each example
            delta_w = np.zeros(10)
            for j in range(len(inputs[i])):     # iterate over each time
                x = X[i][j]
                delta_w += -self.lr * (target[i][j]-prediction[i][j]) * (x)
            self.weights += delta_w
    

    def backward_per_input(self, inputs, target):
        """inputs-- (B, Tx, 10)
           target-- (B, Tx)
            """   
        # print(self.weights)
        for i in range(len(inputs)): 
            X, prediction = self.forward_per_input(inputs[i])
            delta_w = np.zeros(10)
            for j in range(len(inputs[i])):     # iterate over each time
                delta_w += self.lr * (target[i][j]-prediction[j]) * X[j]
            self.weights += delta_w
        # print(self.weights)




    def calculate_loss(self, inputs, targets):
        loss = 0
        for i in range(len(inputs)):
            _ , y_pred = self.forward_per_input(inputs[i])
            loss += binary_crossentropy_loss(targets[i], y_pred, epsilon=1e-15)
        return loss



        
    def train(self, inputs, targets, epochs):
        """inputs-- (B, Tx, 10)
           target-- (B, Tx)
            """           
        for iter in range(epochs):
            self.backward_per_input(inputs, targets)
            loss = self.calculate_loss(inputs, targets)
            print(f"epoch: {iter}, training loss : {loss}")
    

In [501]:
model = SingleRecurrentPerceptron(10, 0.05)

In [502]:
y_pred = model.forward_per_input(X_train_ho[0:8][6])

In [503]:
y_train[0]

array([1, 1, 1, 0, 1, 1, 1, 0, 1])

In [504]:
for iter in range(128):
    model.backward_per_input(X_train_ho, y_train)
    loss = model.calculate_loss(X_train_ho, y_train)
    print(f"epoch: {iter}, training loss : {loss}")

epoch: 0, training loss : 93463.06759993888
epoch: 1, training loss : 93459.3304778473
epoch: 2, training loss : 93458.12251875624
epoch: 3, training loss : 93457.52444166772
epoch: 4, training loss : 93457.16753589087
epoch: 5, training loss : 93456.93043352004
epoch: 6, training loss : 93456.76149540758
epoch: 7, training loss : 93456.63502768459
epoch: 8, training loss : 93456.53680672772
epoch: 9, training loss : 93456.45832167412
epoch: 10, training loss : 93456.3941683143
epoch: 11, training loss : 93456.34075023656
epoch: 12, training loss : 93456.29558132659
epoch: 13, training loss : 93456.25688803032
epoch: 14, training loss : 93456.22337114139
epoch: 15, training loss : 93456.19405718618
epoch: 16, training loss : 93456.16820236985
epoch: 17, training loss : 93456.14522859486
epoch: 18, training loss : 93456.12467978313
epoch: 19, training loss : 93456.1061912841
epoch: 20, training loss : 93456.08946806188
epoch: 21, training loss : 93456.07426884418


KeyboardInterrupt: 

In [505]:
model.calculate_loss(X_train_ho[:9], y_train[:9])

49.64954474060637

In [506]:
model.backward_per_input(X_train_ho[1:90], y_train[1:90])

In [507]:
model.train(X_train_ho, y_train, 5)

epoch: 0, training loss : 93456.04958774277
epoch: 1, training loss : 93456.036413005
epoch: 2, training loss : 93456.02558587158
epoch: 3, training loss : 93456.01556359754
epoch: 4, training loss : 93456.00625960386


In [None]:
model.backward_per_input(X_train_ho[1:8], y_train[1:8])

In [None]:
binary_crossentropy_loss(y_train[0], y_pred)

9.53673132608883

In [None]:
tag_mapping = {1: "NN", 2: "DT", 3: "JJ", 4: "OT"}

# Calculate the number of unique POS tags
num_unique_tags = len(tag_mapping)

# Preprocess training data
# Preprocess training data
train_sentences = []
train_labels = []
for entry in train_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), num_unique_tags))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    # Flatten one-hot encoded representation
    flattened_tags = pos_tags_one_hot.flatten()
    
    train_sentences.append(flattened_tags)
    train_labels.append(chunk_tags)


# Initialize and train the single recurrent perceptron
# Initialize and train the single recurrent perceptron
input_size = len(train_sentences[0])  # Get input size from the first sample
output_size = 2  # Binary classification (1 for chunk, 0 for not chunk)
perceptron = SingleRecurrentPerceptron(input_size, output_size)
perceptron.train(train_sentences, train_labels, epochs=10)



# Load test data
with open('test.jsonl', 'r') as f:
    test_data = [json.loads(line) for line in f]

# Preprocess test data
test_sentences = []
test_labels = []
for entry in test_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), len(tag_mapping)))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    test_sentences.append(pos_tags_one_hot)
    test_labels.append(chunk_tags)

# Evaluate the trained perceptron
predictions = perceptron.predict(test_sentences)

# Assuming we have some evaluation function to compute accuracy
# Let's assume a simple accuracy calculation for demonstration
def accuracy(predictions, targets):
    correct = 0
    total = len(predictions)
    for pred, target in zip(predictions, targets):
        pred_labels = [1 if p > 0 else 0 for p in pred]
        if pred_labels == target:
            correct += 1
    return correct / total

acc = accuracy(predictions, test_labels)
print("Accuracy:", acc)