### Load data

In [1]:
import pandas as pd
import numpy as np


data_path = 'features_annotated.csv'
df = pd.read_csv(data_path)

  return f(*args, **kwds)


In [2]:
X_columns = ['tag', 'level']
y_column = 'label'

X = df[X_columns].values
y = df[y_column].values

### Create a mapping for the labels

In [45]:
labelSet = set()
for l in y:
    labelSet.add(l)

label2Idx = {}
for label in labelSet:
    label2Idx[label] = len(label2Idx)
labelEmbeddings = np.identity(len(label2Idx), dtype='float32')

In [36]:
label2Idx

{'Claim': 0,
 'Body': 1,
 'None': 2,
 'Title': 3,
 'Date': 4,
 'Credibility': 5,
 'Sources': 6}

### Create a mapping for the tags

In [46]:
tagSet = set()
for t in df['tag'].values:
    tagSet.add(t)

tag2Idx = {}
for tag in tagSet:
    tag2Idx[tag] = len(tag2Idx)
tagEmbeddings = np.identity(len(tag2Idx), dtype='float32')

In [47]:
levelEmbeddings = np.identity(max(df['level'].values), dtype='float32')

In [78]:
tag_features = []
level_features = []
labels = []
for X_, y_ in zip(X, y):
    tag_features.append(tagEmbeddings[tag2Idx[X_[0]]])
    level_features.append(levelEmbeddings[X_[1]-1])
    labels.append(labelEmbeddings[label2Idx[y_]])
tag_features = np.array(tag_features)
level_features = np.array(level_features)
labels = np.array(labels)

In [80]:
level_features

array([[0., 0., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

### Dense Model

In [95]:
from keras.models import Model
from keras.layers import TimeDistributed,Conv1D,Dense,Embedding,Input,Dropout,LSTM,Bidirectional,MaxPooling1D,Flatten,concatenate


tag_input = Input(shape=(tagEmbeddings.shape[0],))
tag_ = Dense(32, activation='relu')(tag_input)
tag_ = Dense(16, activation='relu')(tag_)

level_input = Input(shape=(levelEmbeddings.shape[0],))
level_ = Dense(32, activation='relu')(level_input)
level_ = Dense(16, activation='relu')(level_)

output = concatenate([tag_, level_])
output = Dense(32, activation='relu')(output)
output = Dense(16, activation='relu')(output)
output = Dense(len(label2Idx), activation='softmax')(output)

model = Model(inputs=[tag_input, level_input], outputs=[output])
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit([tag_features, level_features], labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2824cd19e8>

### LSTM model

In [114]:
from keras.models import Model
from keras.layers import TimeDistributed,Conv1D,Dense,Embedding,Input,Dropout,LSTM,Bidirectional,MaxPooling1D,Flatten,concatenate


tag_input = Input(shape=(None,), dtype='int32', name='tags_input')
tag = Embedding(output_dim=tagEmbeddings.shape[1], input_dim=tagEmbeddings.shape[0], weights=[tagEmbeddings], trainable=False)(tag_input)

level_input = Input(shape=(None,), dtype='int32', name='levels_input')
level = Embedding(output_dim=levelEmbeddings.shape[1], input_dim=levelEmbeddings.shape[0], weights=[levelEmbeddings], trainable=False)(level_input)

output = concatenate([tag, level])
output = Bidirectional(LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(output)
output = TimeDistributed(Dense(len(label2Idx), activation='softmax'))(output)

model = Model(inputs=[tag_input, level_input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')
model.summary()
#model.fit([tag_features, level_features], labels, epochs=10)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
tags_input (InputLayer)         (None, None)         0                                            
__________________________________________________________________________________________________
levels_input (InputLayer)       (None, None)         0                                            
__________________________________________________________________________________________________
embedding_53 (Embedding)        (None, None, 72)     5184        tags_input[0][0]                 
__________________________________________________________________________________________________
embedding_54 (Embedding)        (None, None, 20)     400         levels_input[0][0]               
__________________________________________________________________________________________________
concatenat