## Import Libs

In [78]:
import os
import pickle
import json
import numpy as np
from sklearn.model_selection import KFold

from activation import *
from utils import *

## Load Data

In [79]:
train_path = os.path.join("data", "train.jsonl")
test_path = os.path.join("data","test.jsonl")

In [80]:
def fetch_data(train_path, test_path):
    with open(train_path, 'r') as f:
        train_data = [json.loads(line) for line in f]
    # Preprocess training data
    train_sentences = []
    train_labels = []
    for entry in train_data:
        tokens = entry['tokens']
        pos_tags = entry['pos_tags']
        chunk_tags = np.array(entry['chunk_tags'])
        
        train_sentences.append(pos_tags)
        train_labels.append(chunk_tags)

    with open(test_path, 'r') as f:
        test_data = [json.loads(line) for line in f]
    # Preprocess test data
    test_sentences = []
    test_labels = []
    for entry in test_data:
        tokens = entry['tokens']
        pos_tags = entry['pos_tags']
        chunk_tags = np.array(entry['chunk_tags'])
        
        test_sentences.append(pos_tags)
        test_labels.append(chunk_tags)
    
    return train_sentences, test_sentences,  train_labels, test_labels

X_train, X_test, y_train, y_test= fetch_data(train_path, test_path)


## Data Preprocessing

In [81]:
"""
Function to process the POS data
into One hot vectors,
that is :
Input: [DT/TT/NN/OT]    Shape: (1)
Output : [DT TT NN OT]  Shape: (4,1)
"""
def one_hot_encode(input_list):
    encoded_list = []
    for item in input_list:
        one_hot_vector = np.zeros(4)
        one_hot_vector[item - 1] = 1  # Adjust index to start from 0
        encoded_list.append(one_hot_vector.tolist())
    return np.array(encoded_list)

In [82]:
"""
Function to process the POS data
into Recurrent Perceptron input format,
that is :
Input : [DT TT NN OT]
"""
def into_ho(X_train):
    X_train_ho = []
    for i in range(len(X_train)):
        X = one_hot_encode(X_train[i])
        temp = []
        for j in range(len(X)):
            if j==0:
                temp.append(np.concatenate([np.array([1.0,0.0,0.0,0.0,0.0]), X[j]]))
            else:
                temp.append((np.concatenate([np.array([0]), X[j-1], X[j]] )))
        X_train_ho.append(np.array(temp))
    return X_train_ho

In [83]:
y_train[0]

array([1, 1, 1, 0, 1, 1, 1, 0, 1])

In [84]:
print(X_train[0])
one_hot_encode(X_train[0])

[1, 4, 3, 1, 4, 4, 3, 1, 4]


array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.]])

In [85]:
X_train_ho = into_ho(X_train)
X_test_ho = into_ho(X_test)


## Recurrent Perceptron Model

In [86]:
"""
Some representation which will help understand this code base.
B       - Batch Size
Tx      - Length of Input string
X_i_b   -   
X_i_j   -   

"""

class SingleRecurrentPerceptron:
    def __init__(self, vec_len=10, lr=0.05):
          
        # Initialize weights and bias
        self.weights = np.random.randn(vec_len)
        self.threshold = np.random.randn(1)
        self.lr = lr

   
    def forward(self, inputs):
        """inputs-- (B, Tx, 10)"""   
        prediction= []    #(B, Tx)
        X_i_b = []      #(B, Tx, 10)
        for j in range(len(inputs)):
            out=[]
            X_i_j = []
            Tx, _ = inputs[j].shape
            y_prev=0
            for i in range(Tx):
                x = np.concatenate([inputs[j][i], np.array([y_prev])])
                X_i_j.append(x)
                net = x.T @ self.weights - self.threshold[0]
                oi = sigmoid(net)
                y_prev = oi
                out.append(oi)
            prediction.append(np.array(out))
            X_i_b.append(np.array(X_i_j))
        return X_i_b, prediction

    # def backward(self, inputs, target):
    #     """inputs-- (B, Tx, 10)
    #        target-- (B, Tx)
    #         """   
    #     X, prediction = self.forward(inputs)

    #     for i in range(len(inputs)):      # iterate over each example
    #         delta_w = np.zeros(10)
    #         for j in range(len(inputs[i])):     # iterate over each time
    #             x = X[i][j]
    #             delta_w += -self.lr * (target[i][j]-prediction[i][j]) * (x)
    #         self.weights += delta_w
    

            
    def backward_gpt(self, inputs, targets):

        X_i_b, prediction = self.forward(inputs)
         
        B = len(inputs)  # Get batch size, sequence length, and feature dim

        # Initialize gradients for weights and bias
        self.weights_grad = np.zeros_like(self.weights)
        self.threshold_grad = np.zeros_like(self.threshold)
        sequence_lengths = [len(t) for t in targets]

        # Calculate gradients for output layer (using element-wise multiplication)
        for b in range(B):
            Tx = sequence_lengths[b]
            for t in range(Tx):
                delta_t = (prediction[b][t] - targets[b][t]) * sigmoid_derivative(prediction[b][t])
                self.weights_grad += X_i_b[b][t]*delta_t
                self.threshold_grad += delta_t

            # BPTT (using chain rule)
            delta_prev = 0
            for t in reversed(range(Tx)):
                if t + 1 < Tx:
                    delta_t = delta_prev * sigmoid_derivative(prediction[b][t]) + np.dot(delta_t, self.weights)
                else:
                    delta_t = delta_prev * sigmoid_derivative(prediction[b][t])
                self.weights_grad -= X_i_b[b][t] * delta_t*0.3  # Exclude previous output
                delta_prev = delta_t

        # Normalize gradients by batch size
        self.weights_grad /= B
        self.threshold_grad /= B

        # Update weights and bias
        self.weights -= self.lr * self.weights_grad
        self.threshold -= self.lr * self.threshold_grad

    def calculate_loss(self, inputs, targets):
        
        """
        This function calculates the total loss for a minibatch of sequences.

        Args:
        inputs: Batch of input sequences (B, Tx_max, vec_len).
        targets: Batch of ground truth sequences (B, Tx_max).

        Returns:
        The average loss over the minibatch.
        """
        B = len(inputs)  # Get batch size, max sequence length, and feature dim

        # Initialize loss to zero
        loss = 0
        accuracy = 0
        _, predictions = self.forward(inputs)
        # Forward pass for each example in the minibatch
        for b in range(B):
            # Calculate loss per example using cross-entropy
          
            loss += cross_entropy_loss(predictions[b], targets[b])
            accuracy += np.mean(targets[b]==(predictions[b]>0.5).astype(int))
            
        # Average loss over the minibatch
        return loss / B, accuracy/B


        
    def train(self, inputs, targets, epochs):

        """inputs-- (B, Tx, 10)
           target-- (B, Tx)
            """           
        
        for iter in range(epochs):
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            train_loss = 0
            val_loss = 0
            train_accuracy = 0
            val_accuracy = 0
            for train_index, val_index in kf.split(inputs):
                train_inputs, val_inputs = [inputs[i] for i in train_index], [inputs[i] for i in val_index]
                train_targets, val_targets = [targets[i] for i in train_index], [targets[i] for i in val_index]
                self.backward_gpt(inputs, targets)
                delta_loss, delta_accuracy = self.calculate_loss(train_inputs,train_targets)
                train_loss += delta_loss
                train_accuracy += delta_accuracy
                delta_loss, delta_accuracy = self.calculate_loss(val_inputs,val_targets )

                val_loss += delta_loss
                val_accuracy += delta_accuracy
            print(f"epoch: {iter:.2f}, training loss : {train_loss/5:.2f}, training accuracy: {train_accuracy*100/5:.2f}%, validation loss: {val_loss/5:.2f}, validation accuracy: {val_accuracy*100/5:.2f}%")
    

    def save(self,path="model.pkl"):
        with open(path, 'wb') as f:
            pickle.dump(self, f)

    def load(self, path):
        with open(path, 'rb') as f:
            self = pickle.load(f) 
        return self

In [103]:
model = SingleRecurrentPerceptron(vec_len=10, lr=1)

In [105]:
model.train(X_train_ho, y_train, epochs=1)

In [94]:
model.save(path="models/recurrent_perceptron.pkl")

In [95]:
model.weights

array([-0.84747142,  0.02764599, -0.05904373,  0.60626922,  0.74619764,
        0.45700575,  0.38727425,  1.97348169,  0.6750649 ,  1.00674874])

## Inference

In [96]:
model = SingleRecurrentPerceptron()
model = model.load("models/recurrent_perceptron.pkl")

In [97]:
tag_mapping = {1: "NN", 2: "DT", 3: "JJ", 4: "OT"}

# Calculate the number of unique POS tags
num_unique_tags = len(tag_mapping)

# Preprocess training data
# Preprocess training data
train_sentences = []
train_labels = []
for entry in train_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), num_unique_tags))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    # Flatten one-hot encoded representation
    flattened_tags = pos_tags_one_hot.flatten()
    
    train_sentences.append(flattened_tags)
    train_labels.append(chunk_tags)


# Initialize and train the single recurrent perceptron
# Initialize and train the single recurrent perceptron
input_size = len(train_sentences[0])  # Get input size from the first sample
output_size = 2  # Binary classification (1 for chunk, 0 for not chunk)
perceptron = SingleRecurrentPerceptron(input_size, output_size)
perceptron.train(train_sentences, train_labels, epochs=10)



# Load test data
with open('test.jsonl', 'r') as f:
    test_data = [json.loads(line) for line in f]

# Preprocess test data
test_sentences = []
test_labels = []
for entry in test_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), len(tag_mapping)))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    test_sentences.append(pos_tags_one_hot)
    test_labels.append(chunk_tags)

# Evaluate the trained perceptron
predictions = perceptron.predict(test_sentences)

# Assuming we have some evaluation function to compute accuracy
# Let's assume a simple accuracy calculation for demonstration
def accuracy(predictions, targets):
    correct = 0
    total = len(predictions)
    for pred, target in zip(predictions, targets):
        pred_labels = [1 if p > 0 else 0 for p in pred]
        if pred_labels == target:
            correct += 1
    return correct / total

acc = accuracy(predictions, test_labels)
print("Accuracy:", acc)

NameError: name 'train_data' is not defined

# Webui

In [98]:
def forward_per_input(inputs):
    # inputs = one_hot_encode(inputs)
    inputs = into_ho([inputs])[0]
    """inputs-- (Tx, 10) """   
    out=[]    #(Tx, 1)
    X_i_j = []  #(Tx, 10)
    Tx = len(inputs)
    y_prev=0
    for i in range(Tx):
        x = np.concatenate([inputs[i], np.array([y_prev])])
        X_i_j.append(x)
        x = x.T @ model.weights - model.threshold[0]
        x = sigmoid(x)
        out.append(x)
    out = (np.array(out) > 0.5) * 1
    return out

def predict(input_string):
    inp = input_string.split(',')
    input_string = [int(i) for i in inp]
    print(input_string)
    result = forward_per_input(input_string)
    out = [str(i) for i in result]
    out = ' '.join(out)
    return out

from flask import Flask, render_template, request, jsonify
import numpy as np
import gradio as gr

app = Flask(__name__)

@app.route("/")
def home():
    return render_template("index.html")

@app.route("/classify", methods=["POST"])
def classify():
    data = request.get_json()
    input_string = data["inputString"]
    out = predict(input_string)
    return jsonify({"result": out})

if __name__ == "__main__":
    iface = gr.Interface(fn=predict, inputs="text", outputs="text")
    iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7869

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


2024/03/29 18:10:42 [W] [service.go:132] login to server failed: dial tcp 44.237.78.176:7000: i/o timeout


[1, 4, 3, 1, 4, 4, 3, 1, 4]
