## Import Libs

In [53]:
import os
import pickle
import json
import numpy as np

from model import *
from activation import *
from utils import *

## Load Data

In [54]:
train_path = os.path.join("data", "train.jsonl")
test_path = os.path.join("data","test.jsonl")

In [55]:
X_train, X_test, y_train, y_test= fetch_data(train_path, test_path)

## Data Preprocessing

In [56]:
y_train[0]

array([1, 1, 1, 0, 1, 1, 1, 0, 1])

In [57]:
print(X_train[0])
one_hot_encode(X_train[0])

[1, 4, 3, 1, 4, 4, 3, 1, 4]


array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.]])

In [58]:
X_train_ho = into_ho(X_train)
X_test_ho = into_ho(X_test)


## Recurrent Perceptron Model

In [59]:
model = SingleRecurrentPerceptron(vec_len=10, lr=1)

In [60]:
model.train(X_train_ho, y_train, epochs=1)

epoch: 0.00, training loss : 239.72, training accuracy: 47.03%, validation loss: 239.76, validation accuracy: 47.11%


In [94]:
model.save(path="models/recurrent_perceptron.pkl")

In [95]:
model.weights

array([-0.84747142,  0.02764599, -0.05904373,  0.60626922,  0.74619764,
        0.45700575,  0.38727425,  1.97348169,  0.6750649 ,  1.00674874])

## Inference

In [96]:
model = SingleRecurrentPerceptron()
model = model.load("models/recurrent_perceptron.pkl")

In [97]:
tag_mapping = {1: "NN", 2: "DT", 3: "JJ", 4: "OT"}

# Calculate the number of unique POS tags
num_unique_tags = len(tag_mapping)

# Preprocess training data
# Preprocess training data
train_sentences = []
train_labels = []
for entry in train_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), num_unique_tags))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    # Flatten one-hot encoded representation
    flattened_tags = pos_tags_one_hot.flatten()
    
    train_sentences.append(flattened_tags)
    train_labels.append(chunk_tags)


# Initialize and train the single recurrent perceptron
# Initialize and train the single recurrent perceptron
input_size = len(train_sentences[0])  # Get input size from the first sample
output_size = 2  # Binary classification (1 for chunk, 0 for not chunk)
perceptron = SingleRecurrentPerceptron(input_size, output_size)
perceptron.train(train_sentences, train_labels, epochs=10)



# Load test data
with open('test.jsonl', 'r') as f:
    test_data = [json.loads(line) for line in f]

# Preprocess test data
test_sentences = []
test_labels = []
for entry in test_data:
    tokens = entry['tokens']
    pos_tags = entry['pos_tags']
    chunk_tags = entry['chunk_tags']
    
    # Convert POS tags to one-hot encoded representation
    pos_tags_one_hot = np.zeros((len(pos_tags), len(tag_mapping)))
    for i, tag in enumerate(pos_tags):
        pos_tags_one_hot[i, tag - 1] = 1  # Subtract 1 to account for 0-based indexing
    
    test_sentences.append(pos_tags_one_hot)
    test_labels.append(chunk_tags)

# Evaluate the trained perceptron
predictions = perceptron.predict(test_sentences)

# Assuming we have some evaluation function to compute accuracy
# Let's assume a simple accuracy calculation for demonstration
def accuracy(predictions, targets):
    correct = 0
    total = len(predictions)
    for pred, target in zip(predictions, targets):
        pred_labels = [1 if p > 0 else 0 for p in pred]
        if pred_labels == target:
            correct += 1
    return correct / total

acc = accuracy(predictions, test_labels)
print("Accuracy:", acc)

NameError: name 'train_data' is not defined

# Webui

In [51]:
from flask import Flask, render_template, request, jsonify
import numpy as np
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [52]:
def forward_per_input(inputs):
    # inputs = one_hot_encode(inputs)
    inputs = into_ho([inputs])[0]
    """inputs-- (Tx, 10) """   
    out=[]    #(Tx, 1)
    X_i_j = []  #(Tx, 10)
    Tx = len(inputs)
    y_prev=0
    for i in range(Tx):
        x = np.concatenate([inputs[i], np.array([y_prev])])
        X_i_j.append(x)
        x = x.T @ model.weights - model.threshold[0]
        x = sigmoid(x)
        out.append(x)
    out = (np.array(out) > 0.5) * 1
    return out

def predict(input_string):
    inp = input_string.split(',')
    input_string = [int(i) for i in inp]
    print(input_string)
    result = forward_per_input(input_string)
    out = [str(i) for i in result]
    out = ' '.join(out)
    return out

app = Flask(__name__)

@app.route("/")
def home():
    return render_template("index.html")

@app.route("/classify", methods=["POST"])
def classify():
    data = request.get_json()
    input_string = data["inputString"]
    out = predict(input_string)
    return jsonify({"result": out})

if __name__ == "__main__":
    iface = gr.Interface(fn=predict, inputs="text", outputs="text")
    iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://a718fbb1b89aa54851.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/gradio/queueing.py", line 522, in process_events
    response = await route_utils.call_process_api(
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/gradio/route_utils.py", line 260, in call_process_api
    output = await app.get_blocks().process_api(
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/gradio/blocks.py", line 1689, in process_api
    result = await self.call_function(
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/gradio/blocks.py", line 1255, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/home/screa/Desktop/Sem8/sem8/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2134, in run_sync_in_worker_thread
    

[1, 2, 3, 4]
[1, 4, 3, 1, 4, 4, 3, 1, 4]
