### Load data

In [None]:
import pandas as pd
import numpy as np


data_path = 'features_annotated.csv'
df = pd.read_csv(data_path)

In [None]:
X_columns = ['tag', 'level']
y_column = 'label'

X = df[X_columns].values
y = df[y_column].values

### Create a mapping for the labels

In [None]:
labelSet = set()
for l in y:
    labelSet.add(l)

label2Idx = {}
for label in labelSet:
    label2Idx[label] = len(label2Idx)

### Create a mapping for the tags

In [None]:
tagSet = set()
for t in df['tag'].values:
    tagSet.add(t)

tag2Idx = {}
for tag in tagSet:
    tag2Idx[tag] = len(tag2Idx)
tagEmbeddings = np.identity(len(tag2Idx), dtype='float32')

In [None]:
levelEmbeddings = np.identity(max(df['level'].values), dtype='float32')

### LSTM model

In [None]:
from keras.models import Model
from keras.layers import TimeDistributed,Conv1D,Dense,Embedding,Input,Dropout,LSTM,Bidirectional,MaxPooling1D,Flatten,concatenate


tag_input = Input(shape=(None,), dtype='int32', name='tags_input')
tag = Embedding(output_dim=tagEmbeddings.shape[1], input_dim=tagEmbeddings.shape[0], weights=[tagEmbeddings], trainable=False)(tag_input)

level_input = Input(shape=(None,), dtype='int32', name='levels_input')
level = Embedding(output_dim=levelEmbeddings.shape[1], input_dim=levelEmbeddings.shape[0], weights=[levelEmbeddings], trainable=False)(level_input)

output = concatenate([tag, level])
output = Bidirectional(LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(output)
output = TimeDistributed(Dense(len(label2Idx), activation='softmax'))(output)

model = Model(inputs=[tag], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam')
model.summary()