In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import re
import spacy
import nltk
from nltk import tokenize
from collections import Counter
from spacy import displacy

nlp = spacy.load('en_core_web_sm')

# Creating Dataset for Entity Recognition

In [2]:
df = pd.read_csv('C:/Users/wongzn/Desktop/df_dec.csv')

new_df = df
new_df.reset_index(inplace=True)
new_df.drop(columns=['index'],inplace=True)

In [3]:
def preprocess_content(column):
    column = column.apply(lambda x: re.sub(r'SINGAPORE -', '', x))
    #column = column.apply(lambda x: re.sub(r'the\b', '', x,flags=re.IGNORECASE))
    column = column.apply(lambda x: re.sub(r'SINGAPORE –', ' ', x, flags=re.IGNORECASE))
    column = column.apply(lambda x: re.sub(r'SINGAPORE-', ' ', x, flags=re.IGNORECASE))
    column = column.apply(lambda x: re.sub(r'\([^)]*\)', '',x)) #remove (anything in brackets)
    #column = column.apply(lambda x: re.sub(r'\d{2}[/-]\w{4}[/-]\w{3,4}', '',x)) #remove(xx-year-old)
    #column = column.apply(lambda x: re.sub(r'straits times','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'per cent','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'cpl kok yuen chin','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'cpl kok','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'st photo','',x, flags=re.IGNORECASE))
    column = column.apply(lambda x: re.sub(r'facebook post','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'years ago','',x, flags=re.IGNORECASE))
    #column = column.apply(lambda x: re.sub(r'[^a-zA-z\s]', ' ', x)) #removing digits and special characters (punctuations)
    #column = column.apply(lambda x: x.lower()) #convert to lowercase
    column = column.apply(lambda x: re.sub('\n','',x))
    return column

new_df['Article Content'] = preprocess_content(new_df['Article Content'])

In [4]:
sentences = []
count_sent = []
for i in range(len(new_df)):
    #Split into sentences
    sentences.append([x for x in tokenize.sent_tokenize(new_df['Article Content'][i])])
    #Counting the number of sentences in each article
    count_sent.append(len(tokenize.sent_tokenize(new_df['Article Content'][i])))

#Flatten list of sentences
new =  [item for sublist in sentences for item in sublist]

In [5]:
new_df = pd.concat([pd.Series(new)],axis=1)
new_df.rename(columns={0:'Sent'},inplace=True)

In [6]:
rm = 'ST PHOTO'
new2 = [item for item in new if rm not in item] #Removing sentences with ST Photo

df_sent = pd.DataFrame(new2, columns=['Sent'])
df_sent.drop_duplicates(subset=['Sent'], inplace=True)

In [7]:
df_sent.to_csv('C:/Users/wongzn/Desktop/df_sent.csv')

## Identifying Entities Using spaCy

In [8]:
ner = df_sent.Sent.apply(nlp)

In [9]:
word = []
pos = []
iob = []
entity = []
sent_num = []
count = 0

for ent in range(len(ner)):
    try:
        count +=1
        for i in ner[ent]:
            word.append(i.text)
            iob.append(i.ent_iob_)
            entity.append(i.ent_type_)
            pos.append(i.pos_)
            sent_num.append("Sentence " + str(count))
    except:
        continue

In [10]:
ent_df = pd.DataFrame(list(zip(sent_num,word,pos,iob,entity)), columns=['Sentence Number','Word', 'POS', 'IOB','Entity'])

In [11]:
ent_df.replace('', np.nan, inplace=True)
ent_df.isnull().sum()

Sentence Number          0
Word                     0
POS                      0
IOB                      0
Entity             1438105
dtype: int64

In [12]:
ent_df.fillna('O',inplace=True)

In [13]:
ent_df.Entity.value_counts()

O              1438105
ORG              70821
DATE             58748
PERSON           39809
CARDINAL         22288
GPE              15682
MONEY            12406
FAC               8251
TIME              4598
NORP              4284
LOC               4258
ORDINAL           3031
QUANTITY          2085
EVENT             1939
LAW               1925
WORK_OF_ART       1567
PRODUCT            943
LANGUAGE           195
PERCENT             38
Name: Entity, dtype: int64

In [14]:
ent_df['Final'] = ent_df['IOB']+'-'+ ent_df['Entity']
ent_df['Final'].replace('O-O','O',inplace=True)

In [15]:
focus = ['ORG','LOC','PERSON','GPE','FAC'] #Only working on these entities
pol_df = ent_df[ent_df['Entity'].isin(focus)]
pol_df.Entity.value_counts()

ORG       70821
PERSON    39809
GPE       15682
FAC        8251
LOC        4258
Name: Entity, dtype: int64

In [16]:
#pol_df.to_csv('C:/Users/wongzn/Desktop/Entity_Data_subset.csv')

# Exploration of Entities 

### Getting Frequency Of Each Entity Type

In [17]:
ner = [nlp(s) for s in df_sent.Sent]

In [18]:
net = [[w.ent_type_ for w in s if w.ent_type_] for s in ner]

In [19]:
net = [s if s else ['None'] for s in net]

In [20]:
ners = [ne for n in net for ne in n]

In [21]:
from collections import Counter

ner_cnt = Counter(ners)
ner_cnt.most_common(10)

[('ORG', 71741),
 ('DATE', 59585),
 ('PERSON', 40534),
 ('CARDINAL', 22577),
 ('GPE', 15873),
 ('None', 14981),
 ('MONEY', 12499),
 ('FAC', 8327),
 ('TIME', 4666),
 ('NORP', 4365)]

In [22]:
NE = pd.DataFrame(ners, columns=['NE'])

In [23]:
NE_count = pd.DataFrame(NE.NE.value_counts())
NE_count = NE_count.rename_axis('Entity').reset_index()
NE_count.rename(columns={'NE':'Freq'},inplace=True)
NE_count['Perc'] = round((NE_count.Freq/sum(NE_count.Freq) * 100),2).astype(str) +'%'

In [24]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(22,5))
plt.bar(NE_count['Entity'],NE_count['Freq'])
plt.xticks(rotation=75)
for i in range(len(NE_count)):
    plt.text(NE_count.Entity[i],NE_count.Freq[i], NE_count.Perc[i], color='black', ha="center",size=12)
plt.show()

<Figure size 2200x500 with 1 Axes>

### Getting Top Frequency in Each Entity Type

In [25]:
k=[]
filt=['PERSON']

for ent in ner:
    for i in ent.ents:
        if i.label_ in filt:
            k.append(i.text)
            
k = pd.DataFrame(k,columns=['PERSON'])
k = pd.DataFrame(k.PERSON.value_counts()).rename(columns={'PERSON':'Freq'})
k = k.rename_axis('Entity').reset_index()
k.head(10)

Unnamed: 0,Entity,Freq
0,Tan,325
1,Lee,261
2,PM Lee,198
3,Ong,179
4,Lee Hsien Loong,147
5,Halimah Yacob,141
6,Lim,136
7,Mr Heng,132
8,Ang Mo Kio,116
9,Chan,114


# Named Entity Recognition

In [26]:
df1 = pd.read_csv('C:/Users/wongzn/Desktop/Entity_Data.csv')
df2 = pd.read_csv('C:/Users/wongzn/Desktop/Entity_Data_Edit.csv') #Edited the entities manually
#df2.drop(columns=['Final'],inplace=True)

In [27]:
df = df1.merge(df2, on=['Unnamed: 0'], how='left')[:80000]
df.IOB_y.fillna(df.IOB_x, inplace=True)
df.Entity_y.fillna(df.Entity_x, inplace=True)
df.drop(columns=['Unnamed: 0','IOB_x','Entity_x','Sentence Number_y','Word_y','POS_y'], inplace=True)
df.rename(columns={'Sentence Number_x':"Sentence Number",'Word_x': 'Word','POS_x':'POS','IOB_y':'IOB','Entity_y':'Entity'}, inplace=True)

In [28]:
#df.to_csv("C:/Users/wongzn/Desktop/Entity_Data_Edit.csv")

In [29]:
df.Entity.value_counts()

O              68073
DATE            2481
ORG             2352
PERSON          1911
CARDINAL        1303
LOC              679
MONEY            605
FAC              478
GPE              382
TITLE            373
TIME             287
QUANTITY         205
NORP             161
EVENT            160
PERCENT          158
LAW              135
ORDINAL          125
PRODUCT           80
WORK_OF_ART       26
LANGUAGE          26
Name: Entity, dtype: int64

In [30]:
df['Tag'] = np.where(df.IOB.isin(['O'])|df.Entity.isin(['O']), 'O', df.IOB.str.strip() + '-' + df.Entity )
#df.drop(columns=['IOB','Entity'],inplace=True)

In [31]:
df.Word.fillna('null',inplace=True)
df.isna().sum()

Sentence Number    0
Word               0
POS                0
IOB                0
Entity             0
Tag                0
dtype: int64

In [32]:
df.groupby('Tag').size().reset_index(name='counts')
X = df.drop('Tag', axis=1)

In [33]:
v = DictVectorizer(sparse=False)
X = v.fit_transform(X.to_dict('records'))
X.shape

(80000, 12401)

In [34]:
y = df.Tag.values

In [35]:
classes = np.unique(y)
classes = classes.tolist()

In [36]:
X.shape, y.shape

((80000, 12401), (80000,))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=0)

In [39]:
new_classes = classes.copy()
new_classes.pop() #Remove O

'O'

In [40]:
new_classes = ['B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']

# Modelling

In [None]:
sgd = SGDClassifier()
sgd.partial_fit(X_train, y_train,classes)
print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

In [24]:
sgd = SGDClassifier()
sgd.fit(X_train, y_train)
print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

              precision    recall  f1-score   support

     B-TITLE       1.00      1.00      1.00        61
     I-TITLE       1.00      1.00      1.00        68
    B-PERSON       1.00      1.00      1.00       333
    I-PERSON       1.00      1.00      1.00       317
       B-ORG       1.00      1.00      1.00       346
       I-ORG       1.00      1.00      1.00       447
       B-FAC       1.00      1.00      1.00        57
       I-FAC       1.00      1.00      1.00        98
       B-LOC       1.00      1.00      1.00       100
       I-LOC       1.00      1.00      1.00       109

   micro avg       1.00      1.00      1.00      1936
   macro avg       1.00      1.00      1.00      1936
weighted avg       1.00      1.00      1.00      1936



In [21]:
from sklearn import model_selection, naive_bayes, svm
SVM = svm.SVC(C=1.6, kernel='linear',random_state=1)
SVM.fit(X_train,y_train)
# predict the labels on validation dataset
predictions_SVM = SVM.predict(X_test)
# Use accuracy_score function to get the accuracy
print(classification_report(y_pred=predictions_SVM, y_true=y_test, labels=new_classes))

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


               precision    recall  f1-score   support

   B-CARDINAL       0.52      0.68      0.59       414
       B-DATE       0.66      0.56      0.60       454
      B-EVENT       0.17      0.07      0.10        15
        B-FAC       0.63      0.38      0.48        50
        B-GPE       0.60      0.74      0.67       117
   B-LANGUAGE       0.75      0.86      0.80         7
        B-LAW       0.25      0.06      0.10        17
        B-LOC       0.64      0.62      0.63        92
      B-MONEY       0.29      0.10      0.15        99
       B-NORP       0.95      0.65      0.77        55
    B-ORDINAL       0.87      0.91      0.89        45
        B-ORG       0.56      0.35      0.43       317
    B-PERCENT       0.00      0.00      0.00         0
     B-PERSON       0.50      0.38      0.43       267
    B-PRODUCT       0.68      0.62      0.65        24
   B-QUANTITY       0.40      0.08      0.13        26
       B-TIME       0.43      0.25      0.32        24
      B-T

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
knn=KNeighborsClassifier(n_neighbors=37)
knn.fit(X_train,y_train)
knn_predictions = knn.predict(X_test)
print(classification_report(y_pred=knn_predictions, y_true=y_test, labels=new_classes))

In [None]:
from sklearn import tree
tree = tree.DecisionTreeClassifier(random_state = 21)
tree.fit(X_train,y_train)
tree_prediction = tree.predict(X_test)
print(classification_report(y_pred=tree_prediction, y_true=y_test, labels=new_classes))

In [52]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(multi_class='multinomial',solver='newton-cg')
LR.fit(X_train,y_train)
pred_LR = LR.predict(X_test)
print(classification_report(y_pred=pred_LR, y_true=y_test, labels=new_classes))

              precision    recall  f1-score   support

       B-art       1.00      0.11      0.20         9
       B-eve       0.00      0.00      0.00         3
       B-geo       0.42      0.72      0.53        69
       B-gpe       0.77      0.60      0.67       102
       B-nat       0.00      0.00      0.00         0
       B-org       0.48      0.51      0.50        63
       B-per       0.71      0.49      0.58        41
       B-tim       1.00      0.79      0.88        52
       I-art       0.00      0.00      0.00        10
       I-eve       0.00      0.00      0.00         3
       I-geo       0.00      0.00      0.00        11
       I-gpe       1.00      0.17      0.29         6
       I-nat       0.00      0.00      0.00         1
       I-org       0.50      0.34      0.41        47
       I-per       0.54      0.58      0.56        66
       I-tim       1.00      0.25      0.40         4

   micro avg       0.59      0.54      0.56       487
   macro avg       0.46   

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [53]:
from sklearn.neural_network import MLPClassifier
nn = MLPClassifier(random_state=1)
nn.fit(X_train,y_train)
nn_prediction = nn.predict(X_test)
print(classification_report(y_pred=nn_prediction, y_true=y_test, labels=new_classes))

              precision    recall  f1-score   support

       B-art       0.60      0.33      0.43         9
       B-eve       0.00      0.00      0.00         3
       B-geo       0.52      0.57      0.54        69
       B-gpe       0.72      0.75      0.73       102
       B-nat       0.00      0.00      0.00         0
       B-org       0.49      0.52      0.51        63
       B-per       0.42      0.49      0.45        41
       B-tim       0.92      0.90      0.91        52
       I-art       0.43      0.30      0.35        10
       I-eve       1.00      0.33      0.50         3
       I-geo       0.57      0.36      0.44        11
       I-gpe       0.75      0.50      0.60         6
       I-nat       0.00      0.00      0.00         1
       I-org       0.49      0.43      0.45        47
       I-per       0.62      0.48      0.54        66
       I-tim       0.25      0.25      0.25         4

   micro avg       0.60      0.58      0.59       487
   macro avg       0.49   

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


# CRF Model For NER

In [41]:
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

In [42]:
df1 = pd.read_csv('C:/Users/wongzn/Desktop/Entity_Data.csv')
df2 = pd.read_csv('C:/Users/wongzn/Desktop/Entity_Data_Edit.csv')
#df2.drop(columns=['Final'],inplace=True)
df = df1.merge(df2, on=['Unnamed: 0'], how='left')[:80000]
df.IOB_y.fillna(df.IOB_x, inplace=True)
df.Entity_y.fillna(df.Entity_x, inplace=True)
df.drop(columns=['Unnamed: 0','IOB_x','Entity_x','Sentence Number_y','Word_y','POS_y'], inplace=True)
df.rename(columns={'Sentence Number_x':"Sentence Number",'Word_x': 'Word','POS_x':'POS','IOB_y':'IOB','Entity_y':'Entity'}, inplace=True)
df['Tag'] = np.where(df.IOB.isin(['O'])|df.Entity.isin(['O']), 'O', df.IOB.str.strip() + '-' + df.Entity )
df.drop(columns=['IOB','Entity'],inplace=True)

In [43]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(), 
                                                           s['POS'].values.tolist(), 
                                                           s['Tag'].values.tolist())]
        self.grouped = self.data.groupby('Sentence Number').apply(agg_func)
        self.sentences = [s for s in self.grouped]
        
    def get_next(self):
        try: 
            s = self.grouped['Sentence {}'.format(self.n_sent)]
            self.n_sent += 1
            return s 
        except:
            return None

In [44]:
getter = SentenceGetter(df)
sent = getter.get_next()
#print(sent)

In [45]:
sentences = getter.sentences

In [46]:
def word2features(sent, i):
    word = str(sent[i][0])
    postag = sent[i][1]
    
    features = {
        'bias': 1.0, 
        'word.lower()': word.lower(), 
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = str(sent[i-1][0])
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True
    if i < len(sent)-1:
        word1 = str(sent[i+1][0])
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    return [token for token, postag, label in sent]

In [47]:
X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [49]:
import time

In [50]:
st = time.time()
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(X_train, y_train)

CRF(algorithm='lbfgs', all_possible_states=None, all_possible_transitions=True,
    averaging=None, c=None, c1=0.1, c2=0.1, calibration_candidates=None,
    calibration_eta=None, calibration_max_trials=None, calibration_rate=None,
    calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
    gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
    max_linesearch=None, min_freq=None, model_filename=None, num_memories=None,
    pa_type=None, period=None, trainer_cls=None, variance=None, verbose=False)

In [51]:
time.time()-st

28.678688526153564

In [52]:
new_classes = ['B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']

In [53]:
y_pred = crf.predict(X_test)
metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=new_classes)

0.7262327040614024

In [54]:
print(metrics.flat_classification_report(y_test, y_pred, labels = new_classes))

              precision    recall  f1-score   support

     B-TITLE       0.73      0.47      0.57        64
     I-TITLE       0.49      0.29      0.36        73
    B-PERSON       0.77      0.83      0.80       350
    I-PERSON       0.80      0.87      0.83       350
       B-ORG       0.68      0.65      0.67       330
       I-ORG       0.68      0.73      0.71       393
       B-FAC       0.81      0.53      0.64        55
       I-FAC       0.88      0.50      0.64       100
       B-LOC       0.81      0.81      0.81       118
       I-LOC       0.77      0.80      0.79       108

   micro avg       0.74      0.72      0.73      1941
   macro avg       0.74      0.65      0.68      1941
weighted avg       0.74      0.72      0.73      1941



# CRF Gridsearch

In [55]:
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV

crf = sklearn_crfsuite.CRF(algorithm='lbfgs', max_iterations=100,all_possible_transitions=True)
params_space = {'c1': scipy.stats.expon(scale=0.5),'c2': scipy.stats.expon(scale=0.05),}

# use the same metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=new_classes)

# search
st = time.time()
rs = RandomizedSearchCV(crf, params_space, cv=3, verbose=1,n_jobs=-1, n_iter=50, scoring=f1_scorer)
rs.fit(X_train, y_train)
print(time.time()-st)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 11.4min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 42.7min finished


2649.630345582962


In [56]:
print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))

best params: {'c1': 0.006270641912923742, 'c2': 0.09073265475048321}
best CV score: 0.7489902505576554
model size: 1.11M


In [57]:
crf = rs.best_estimator_
y_pred = crf.predict(X_test)
print(metrics.flat_classification_report(y_test, y_pred, labels=new_classes))

              precision    recall  f1-score   support

     B-TITLE       0.77      0.47      0.58        64
     I-TITLE       0.58      0.29      0.39        73
    B-PERSON       0.78      0.81      0.79       350
    I-PERSON       0.81      0.85      0.83       350
       B-ORG       0.68      0.65      0.67       330
       I-ORG       0.68      0.72      0.70       393
       B-FAC       0.79      0.56      0.66        55
       I-FAC       0.92      0.54      0.68       100
       B-LOC       0.81      0.81      0.81       118
       I-LOC       0.80      0.80      0.80       108

   micro avg       0.75      0.72      0.73      1941
   macro avg       0.76      0.65      0.69      1941
weighted avg       0.75      0.72      0.73      1941



# Feeding in New Data (Test Dataset)

In [58]:
df_test = pd.read_csv('C:/Users/wongzn/Desktop/CRF_test_data.csv')[13:18701]
df_test.drop(columns=['Unnamed: 0'],inplace=True)

In [59]:
df_test.tail()

Unnamed: 0,Sentence Number,Word,POS,IOB,Entity_Original
18696,Sentence 22093,years,NOUN,I,DATE
18697,Sentence 22093,and,CCONJ,O,O
18698,Sentence 22093,fined,VERB,O,O
18699,Sentence 22093,.,PUNCT,O,O
18700,Sentence 22094,,SPACE,O,O


In [None]:
#df_test = df1_test.merge(df2_test, on=['Unnamed: 0'], how='left')[557804:611740]
#df_test.IOB_y.fillna(df_test.IOB_x, inplace=True)
#df_test.Entity_y.fillna(df_test.Entity_x, inplace=True)
#df_test.drop(columns=['Unnamed: 0','IOB_x','Entity_x','Sentence Number_y','Word_y','POS_y'], inplace=True)
#df_test.rename(columns={'Sentence Number_x':"Sentence Number",'Word_x': 'Word','POS_x':'POS','IOB_y':'IOB','Entity_y':'Entity_Original'}, inplace=True)

In [60]:
df_test['Tag'] = np.where(df_test.IOB.isin(['O'])|df_test.Entity_Original.isin(['O']), 'O', df_test.IOB.str.strip() + '-' + df_test.Entity_Original )
df_test.drop(columns=['IOB'],inplace=True)

In [61]:
getter_test = SentenceGetter(df_test)

In [62]:
sentences_test = getter_test.sentences

In [63]:
X_train = [sent2features(s) for s in sentences]
X_test = [sent2features(s) for s in sentences_test]
Y_train = [sent2labels(s) for s in sentences]
Y_test = [sent2labels(s) for s in sentences_test]

In [64]:
st = time.time()
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(X_train, Y_train)

CRF(algorithm='lbfgs', all_possible_states=None, all_possible_transitions=True,
    averaging=None, c=None, c1=0.1, c2=0.1, calibration_candidates=None,
    calibration_eta=None, calibration_max_trials=None, calibration_rate=None,
    calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
    gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
    max_linesearch=None, min_freq=None, model_filename=None, num_memories=None,
    pa_type=None, period=None, trainer_cls=None, variance=None, verbose=False)

In [65]:
time.time()-st

132.8361439704895

In [66]:
y_pred = crf.predict(X_test)
#metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=new_classes)

In [67]:
flat_list_true = [item for sublist in Y_test for item in sublist]

In [68]:
flat_list_value = df_test.Word

In [69]:
flat_list_pred = [item for sublist in y_pred for item in sublist]

In [70]:
test_df = pd.DataFrame()

In [71]:
test_df['Word'] = [x for x in flat_list_value]
test_df['Original'] = flat_list_true
test_df['Pred'] = flat_list_pred

In [72]:
test_df.reset_index(inplace=True)

In [73]:
test_df.drop(columns=['index'],inplace=True)

In [75]:
test_df.head()

Unnamed: 0,Word,Original,Pred
0,Singaporeans,B-NORP,B-NORP
1,should,O,O
2,defer,O,O
3,non,O,O
4,-,O,O


In [None]:
#test_df.to_csv("C:/Users/wongzn/Desktop/CRF_res.csv")

# Getting Entities from CRF Model Prediction

In [76]:
location = []
for i in range(len(test_df)):
    if test_df.Pred.iloc[i] == 'B-LOC':
        word = test_df.Word.iloc[i]
        count = i
        while test_df.Pred.iloc[count+1] == 'I-LOC':
            word = word + ' ' + test_df.Word.iloc[count+1]
            count +=1
        else: 
            word = word
        location.append(word)

In [77]:
loc_df = pd.DataFrame(location,columns=['Loc'])
loc_df = pd.DataFrame(loc_df.Loc.value_counts()).rename(columns={'Loc':'Freq'})
loc_df = loc_df.rename_axis('Entity').reset_index()

In [78]:
loc_df.head(10)

Unnamed: 0,Entity,Freq
0,Asia Chapter,4
1,Seletar Airport,2
2,Subang,2
3,Pedra Branca,2
4,Causeway Bay,2
5,Serangoon Road,1
6,Rakhine,1
7,Telok Kurau,1
8,Punggol,1
9,Choa Chu Kang,1


# CRF Results Exploration

In [79]:
from collections import Counter

label_from = ['O','B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']
label_to = ['O','B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']

def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        a =['O','B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']
        if label_from in a and label_to in a:
            print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))

print("Top likely transitions:")
print_transitions(Counter(crf.transition_features_).most_common(20))

print("\nTop unlikely transitions:")
print_transitions(Counter(crf.transition_features_).most_common()[-20:])

Top likely transitions:
B-FAC  -> I-FAC   7.052096
B-ORG  -> I-ORG   6.022573
I-ORG  -> I-ORG   5.918308
I-FAC  -> I-FAC   5.634691
B-PERSON -> I-PERSON 5.481820
B-TITLE -> I-TITLE 5.377037
I-PERSON -> I-PERSON 5.291522

Top unlikely transitions:
O      -> I-FAC   -1.878616
I-TITLE -> I-ORG   -1.972712
O      -> I-LOC   -2.061899
O      -> I-PERSON -2.489468
O      -> I-ORG   -2.588420
O      -> I-TITLE -3.209935


In [80]:
def print_state_features(state_features):
    for (attr, label), weight in state_features:
        a =['B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC']
        if label in a:
            print("%0.6f %-8s %s" % (weight, label, attr))

print("Top positive:")
print_state_features(Counter(crf.state_features_).most_common(30))

print("\nTop negative:")
print_state_features(Counter(crf.state_features_).most_common()[-100:])

Top positive:
4.227195 B-ORG    word.lower():parliament
4.071964 B-ORG    word.lower():wang
3.899756 B-TITLE  word.lower():president
3.612791 B-PERSON -1:word.lower():judge
3.485618 B-ORG    word[-3:]:ram
3.391730 B-ORG    word.lower():twitter

Top negative:
-1.372623 I-LOC    -1:word.lower():west
-1.442770 B-PERSON -1:word.lower():-
-1.833212 I-LOC    -1:word.lower():kio


In [81]:
import eli5

eli5.show_weights(crf, top=10)

Using TensorFlow backend.


From \ To,O,B-CARDINAL,I-CARDINAL,B-DATE,I-DATE,B-EVENT,I-EVENT,B-FAC,I-FAC,B-GPE,I-GPE,B-LANGUAGE,I-LANGUAGE,B-LAW,I-LAW,B-LOC,I-LOC,B-MONEY,I-MONEY,B-NORP,I-NORP,B-ORDINAL,B-ORG,I-ORG,B-PERCENT,I-PERCENT,B-PERSON,I-PERSON,B-PRODUCT,I-PRODUCT,B-QUANTITY,I-QUANTITY,B-TIME,I-TIME,B-TITLE,I-TITLE,B-WORK_OF_ART,I-WORK_OF_ART
O,3.774,2.28,-3.191,1.979,-4.369,0.347,-2.114,0.867,-1.879,0.698,-1.216,0.017,-1.186,1.202,-3.101,1.04,-2.062,2.416,-4.514,0.796,-1.5,0.944,1.478,-2.588,1.179,-2.073,1.257,-2.489,0.109,-1.972,1.294,-2.324,0.981,-3.193,0.405,-3.21,0.407,-1.997
B-CARDINAL,1.174,0.0,5.097,0.205,-2.732,0.043,0.0,-0.089,-0.53,-0.029,0.0,0.0,0.0,0.0,0.0,-0.359,-1.009,-1.002,-1.616,1.176,0.0,-0.119,1.257,-0.761,0.0,-1.069,0.0,-0.688,0.925,-0.205,0.0,-1.654,-0.386,-1.944,-0.37,-0.227,0.0,-0.065
I-CARDINAL,0.802,-0.993,5.356,-1.224,-1.673,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.592,-0.794,0.072,0.0,0.0,-0.244,0.0,-0.413,-1.114,0.0,0.0,0.657,0.0,-0.049,-0.349,-0.197,-0.795,0.0,0.0,0.0,0.0
B-DATE,0.338,-1.073,-0.925,-0.542,5.228,0.159,0.0,-0.061,0.0,-0.378,0.0,0.0,0.0,0.0,-0.274,-1.172,-0.624,0.507,-1.181,-0.375,0.0,-0.956,0.004,-0.504,0.0,-0.15,-0.67,-0.221,-0.652,-0.315,-0.153,-0.36,1.473,-1.457,-0.035,-0.09,0.0,0.0
I-DATE,-0.641,-1.259,-1.496,-0.129,5.332,-1.177,-0.0,0.0,-0.004,0.0,0.0,0.0,0.0,0.0,-0.306,0.0,-0.109,-0.237,-0.682,0.0,0.0,-0.038,-0.621,-0.726,0.0,-0.681,-0.743,0.0,0.61,0.0,-0.075,-0.043,-0.155,-0.35,-0.293,-0.352,0.0,0.0
B-EVENT,-0.571,0.0,0.0,0.0,-0.091,0.0,6.057,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.061,-0.175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-EVENT,-0.494,0.0,0.0,0.0,-0.457,0.0,6.814,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016,0.0,0.0,0.0,0.0,0.0,0.0,-0.223,0.0,0.0,-0.214,-0.116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-FAC,0.053,0.0,0.0,0.0,-0.139,0.0,0.0,0.0,7.052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.396,0.0,0.0,0.0,0.0,0.0,-0.279,-0.044,0.0,0.0,-0.078,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-FAC,-0.312,-0.335,0.0,0.557,-0.16,0.0,0.0,0.936,5.635,-0.061,-0.0,0.0,0.0,0.0,0.0,0.0,-0.695,0.0,0.0,0.0,0.0,0.0,-0.182,-0.736,0.0,0.0,-0.709,-0.444,0.0,0.0,0.0,0.0,0.143,0.0,-0.132,0.0,0.0,0.0
B-GPE,0.451,-0.328,0.0,0.379,-0.468,0.0,-0.089,0.0,-0.789,0.0,5.195,0.0,0.0,0.0,-0.189,0.0,-0.416,0.0,0.0,0.0,0.0,0.0,0.618,-1.218,0.0,0.0,-0.5,-0.255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,Unnamed: 25_level_4,Unnamed: 26_level_4,Unnamed: 27_level_4,Unnamed: 28_level_4,Unnamed: 29_level_4,Unnamed: 30_level_4,Unnamed: 31_level_4,Unnamed: 32_level_4,Unnamed: 33_level_4,Unnamed: 34_level_4,Unnamed: 35_level_4,Unnamed: 36_level_4,Unnamed: 37_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5,Unnamed: 25_level_5,Unnamed: 26_level_5,Unnamed: 27_level_5,Unnamed: 28_level_5,Unnamed: 29_level_5,Unnamed: 30_level_5,Unnamed: 31_level_5,Unnamed: 32_level_5,Unnamed: 33_level_5,Unnamed: 34_level_5,Unnamed: 35_level_5,Unnamed: 36_level_5,Unnamed: 37_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6,Unnamed: 22_level_6,Unnamed: 23_level_6,Unnamed: 24_level_6,Unnamed: 25_level_6,Unnamed: 26_level_6,Unnamed: 27_level_6,Unnamed: 28_level_6,Unnamed: 29_level_6,Unnamed: 30_level_6,Unnamed: 31_level_6,Unnamed: 32_level_6,Unnamed: 33_level_6,Unnamed: 34_level_6,Unnamed: 35_level_6,Unnamed: 36_level_6,Unnamed: 37_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,Unnamed: 17_level_7,Unnamed: 18_level_7,Unnamed: 19_level_7,Unnamed: 20_level_7,Unnamed: 21_level_7,Unnamed: 22_level_7,Unnamed: 23_level_7,Unnamed: 24_level_7,Unnamed: 25_level_7,Unnamed: 26_level_7,Unnamed: 27_level_7,Unnamed: 28_level_7,Unnamed: 29_level_7,Unnamed: 30_level_7,Unnamed: 31_level_7,Unnamed: 32_level_7,Unnamed: 33_level_7,Unnamed: 34_level_7,Unnamed: 35_level_7,Unnamed: 36_level_7,Unnamed: 37_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,Unnamed: 17_level_8,Unnamed: 18_level_8,Unnamed: 19_level_8,Unnamed: 20_level_8,Unnamed: 21_level_8,Unnamed: 22_level_8,Unnamed: 23_level_8,Unnamed: 24_level_8,Unnamed: 25_level_8,Unnamed: 26_level_8,Unnamed: 27_level_8,Unnamed: 28_level_8,Unnamed: 29_level_8,Unnamed: 30_level_8,Unnamed: 31_level_8,Unnamed: 32_level_8,Unnamed: 33_level_8,Unnamed: 34_level_8,Unnamed: 35_level_8,Unnamed: 36_level_8,Unnamed: 37_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,Unnamed: 13_level_9,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9,Unnamed: 17_level_9,Unnamed: 18_level_9,Unnamed: 19_level_9,Unnamed: 20_level_9,Unnamed: 21_level_9,Unnamed: 22_level_9,Unnamed: 23_level_9,Unnamed: 24_level_9,Unnamed: 25_level_9,Unnamed: 26_level_9,Unnamed: 27_level_9,Unnamed: 28_level_9,Unnamed: 29_level_9,Unnamed: 30_level_9,Unnamed: 31_level_9,Unnamed: 32_level_9,Unnamed: 33_level_9,Unnamed: 34_level_9,Unnamed: 35_level_9,Unnamed: 36_level_9,Unnamed: 37_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,Unnamed: 13_level_10,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10,Unnamed: 17_level_10,Unnamed: 18_level_10,Unnamed: 19_level_10,Unnamed: 20_level_10,Unnamed: 21_level_10,Unnamed: 22_level_10,Unnamed: 23_level_10,Unnamed: 24_level_10,Unnamed: 25_level_10,Unnamed: 26_level_10,Unnamed: 27_level_10,Unnamed: 28_level_10,Unnamed: 29_level_10,Unnamed: 30_level_10,Unnamed: 31_level_10,Unnamed: 32_level_10,Unnamed: 33_level_10,Unnamed: 34_level_10,Unnamed: 35_level_10,Unnamed: 36_level_10,Unnamed: 37_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,Unnamed: 13_level_11,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11,Unnamed: 17_level_11,Unnamed: 18_level_11,Unnamed: 19_level_11,Unnamed: 20_level_11,Unnamed: 21_level_11,Unnamed: 22_level_11,Unnamed: 23_level_11,Unnamed: 24_level_11,Unnamed: 25_level_11,Unnamed: 26_level_11,Unnamed: 27_level_11,Unnamed: 28_level_11,Unnamed: 29_level_11,Unnamed: 30_level_11,Unnamed: 31_level_11,Unnamed: 32_level_11,Unnamed: 33_level_11,Unnamed: 34_level_11,Unnamed: 35_level_11,Unnamed: 36_level_11,Unnamed: 37_level_11
Weight?,Feature,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12,Unnamed: 17_level_12,Unnamed: 18_level_12,Unnamed: 19_level_12,Unnamed: 20_level_12,Unnamed: 21_level_12,Unnamed: 22_level_12,Unnamed: 23_level_12,Unnamed: 24_level_12,Unnamed: 25_level_12,Unnamed: 26_level_12,Unnamed: 27_level_12,Unnamed: 28_level_12,Unnamed: 29_level_12,Unnamed: 30_level_12,Unnamed: 31_level_12,Unnamed: 32_level_12,Unnamed: 33_level_12,Unnamed: 34_level_12,Unnamed: 35_level_12,Unnamed: 36_level_12,Unnamed: 37_level_12
Weight?,Feature,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13,Unnamed: 6_level_13,Unnamed: 7_level_13,Unnamed: 8_level_13,Unnamed: 9_level_13,Unnamed: 10_level_13,Unnamed: 11_level_13,Unnamed: 12_level_13,Unnamed: 13_level_13,Unnamed: 14_level_13,Unnamed: 15_level_13,Unnamed: 16_level_13,Unnamed: 17_level_13,Unnamed: 18_level_13,Unnamed: 19_level_13,Unnamed: 20_level_13,Unnamed: 21_level_13,Unnamed: 22_level_13,Unnamed: 23_level_13,Unnamed: 24_level_13,Unnamed: 25_level_13,Unnamed: 26_level_13,Unnamed: 27_level_13,Unnamed: 28_level_13,Unnamed: 29_level_13,Unnamed: 30_level_13,Unnamed: 31_level_13,Unnamed: 32_level_13,Unnamed: 33_level_13,Unnamed: 34_level_13,Unnamed: 35_level_13,Unnamed: 36_level_13,Unnamed: 37_level_13
Weight?,Feature,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,Unnamed: 10_level_14,Unnamed: 11_level_14,Unnamed: 12_level_14,Unnamed: 13_level_14,Unnamed: 14_level_14,Unnamed: 15_level_14,Unnamed: 16_level_14,Unnamed: 17_level_14,Unnamed: 18_level_14,Unnamed: 19_level_14,Unnamed: 20_level_14,Unnamed: 21_level_14,Unnamed: 22_level_14,Unnamed: 23_level_14,Unnamed: 24_level_14,Unnamed: 25_level_14,Unnamed: 26_level_14,Unnamed: 27_level_14,Unnamed: 28_level_14,Unnamed: 29_level_14,Unnamed: 30_level_14,Unnamed: 31_level_14,Unnamed: 32_level_14,Unnamed: 33_level_14,Unnamed: 34_level_14,Unnamed: 35_level_14,Unnamed: 36_level_14,Unnamed: 37_level_14
Weight?,Feature,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15,Unnamed: 6_level_15,Unnamed: 7_level_15,Unnamed: 8_level_15,Unnamed: 9_level_15,Unnamed: 10_level_15,Unnamed: 11_level_15,Unnamed: 12_level_15,Unnamed: 13_level_15,Unnamed: 14_level_15,Unnamed: 15_level_15,Unnamed: 16_level_15,Unnamed: 17_level_15,Unnamed: 18_level_15,Unnamed: 19_level_15,Unnamed: 20_level_15,Unnamed: 21_level_15,Unnamed: 22_level_15,Unnamed: 23_level_15,Unnamed: 24_level_15,Unnamed: 25_level_15,Unnamed: 26_level_15,Unnamed: 27_level_15,Unnamed: 28_level_15,Unnamed: 29_level_15,Unnamed: 30_level_15,Unnamed: 31_level_15,Unnamed: 32_level_15,Unnamed: 33_level_15,Unnamed: 34_level_15,Unnamed: 35_level_15,Unnamed: 36_level_15,Unnamed: 37_level_15
Weight?,Feature,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16,Unnamed: 6_level_16,Unnamed: 7_level_16,Unnamed: 8_level_16,Unnamed: 9_level_16,Unnamed: 10_level_16,Unnamed: 11_level_16,Unnamed: 12_level_16,Unnamed: 13_level_16,Unnamed: 14_level_16,Unnamed: 15_level_16,Unnamed: 16_level_16,Unnamed: 17_level_16,Unnamed: 18_level_16,Unnamed: 19_level_16,Unnamed: 20_level_16,Unnamed: 21_level_16,Unnamed: 22_level_16,Unnamed: 23_level_16,Unnamed: 24_level_16,Unnamed: 25_level_16,Unnamed: 26_level_16,Unnamed: 27_level_16,Unnamed: 28_level_16,Unnamed: 29_level_16,Unnamed: 30_level_16,Unnamed: 31_level_16,Unnamed: 32_level_16,Unnamed: 33_level_16,Unnamed: 34_level_16,Unnamed: 35_level_16,Unnamed: 36_level_16,Unnamed: 37_level_16
Weight?,Feature,Unnamed: 2_level_17,Unnamed: 3_level_17,Unnamed: 4_level_17,Unnamed: 5_level_17,Unnamed: 6_level_17,Unnamed: 7_level_17,Unnamed: 8_level_17,Unnamed: 9_level_17,Unnamed: 10_level_17,Unnamed: 11_level_17,Unnamed: 12_level_17,Unnamed: 13_level_17,Unnamed: 14_level_17,Unnamed: 15_level_17,Unnamed: 16_level_17,Unnamed: 17_level_17,Unnamed: 18_level_17,Unnamed: 19_level_17,Unnamed: 20_level_17,Unnamed: 21_level_17,Unnamed: 22_level_17,Unnamed: 23_level_17,Unnamed: 24_level_17,Unnamed: 25_level_17,Unnamed: 26_level_17,Unnamed: 27_level_17,Unnamed: 28_level_17,Unnamed: 29_level_17,Unnamed: 30_level_17,Unnamed: 31_level_17,Unnamed: 32_level_17,Unnamed: 33_level_17,Unnamed: 34_level_17,Unnamed: 35_level_17,Unnamed: 36_level_17,Unnamed: 37_level_17
Weight?,Feature,Unnamed: 2_level_18,Unnamed: 3_level_18,Unnamed: 4_level_18,Unnamed: 5_level_18,Unnamed: 6_level_18,Unnamed: 7_level_18,Unnamed: 8_level_18,Unnamed: 9_level_18,Unnamed: 10_level_18,Unnamed: 11_level_18,Unnamed: 12_level_18,Unnamed: 13_level_18,Unnamed: 14_level_18,Unnamed: 15_level_18,Unnamed: 16_level_18,Unnamed: 17_level_18,Unnamed: 18_level_18,Unnamed: 19_level_18,Unnamed: 20_level_18,Unnamed: 21_level_18,Unnamed: 22_level_18,Unnamed: 23_level_18,Unnamed: 24_level_18,Unnamed: 25_level_18,Unnamed: 26_level_18,Unnamed: 27_level_18,Unnamed: 28_level_18,Unnamed: 29_level_18,Unnamed: 30_level_18,Unnamed: 31_level_18,Unnamed: 32_level_18,Unnamed: 33_level_18,Unnamed: 34_level_18,Unnamed: 35_level_18,Unnamed: 36_level_18,Unnamed: 37_level_18
Weight?,Feature,Unnamed: 2_level_19,Unnamed: 3_level_19,Unnamed: 4_level_19,Unnamed: 5_level_19,Unnamed: 6_level_19,Unnamed: 7_level_19,Unnamed: 8_level_19,Unnamed: 9_level_19,Unnamed: 10_level_19,Unnamed: 11_level_19,Unnamed: 12_level_19,Unnamed: 13_level_19,Unnamed: 14_level_19,Unnamed: 15_level_19,Unnamed: 16_level_19,Unnamed: 17_level_19,Unnamed: 18_level_19,Unnamed: 19_level_19,Unnamed: 20_level_19,Unnamed: 21_level_19,Unnamed: 22_level_19,Unnamed: 23_level_19,Unnamed: 24_level_19,Unnamed: 25_level_19,Unnamed: 26_level_19,Unnamed: 27_level_19,Unnamed: 28_level_19,Unnamed: 29_level_19,Unnamed: 30_level_19,Unnamed: 31_level_19,Unnamed: 32_level_19,Unnamed: 33_level_19,Unnamed: 34_level_19,Unnamed: 35_level_19,Unnamed: 36_level_19,Unnamed: 37_level_19
Weight?,Feature,Unnamed: 2_level_20,Unnamed: 3_level_20,Unnamed: 4_level_20,Unnamed: 5_level_20,Unnamed: 6_level_20,Unnamed: 7_level_20,Unnamed: 8_level_20,Unnamed: 9_level_20,Unnamed: 10_level_20,Unnamed: 11_level_20,Unnamed: 12_level_20,Unnamed: 13_level_20,Unnamed: 14_level_20,Unnamed: 15_level_20,Unnamed: 16_level_20,Unnamed: 17_level_20,Unnamed: 18_level_20,Unnamed: 19_level_20,Unnamed: 20_level_20,Unnamed: 21_level_20,Unnamed: 22_level_20,Unnamed: 23_level_20,Unnamed: 24_level_20,Unnamed: 25_level_20,Unnamed: 26_level_20,Unnamed: 27_level_20,Unnamed: 28_level_20,Unnamed: 29_level_20,Unnamed: 30_level_20,Unnamed: 31_level_20,Unnamed: 32_level_20,Unnamed: 33_level_20,Unnamed: 34_level_20,Unnamed: 35_level_20,Unnamed: 36_level_20,Unnamed: 37_level_20
Weight?,Feature,Unnamed: 2_level_21,Unnamed: 3_level_21,Unnamed: 4_level_21,Unnamed: 5_level_21,Unnamed: 6_level_21,Unnamed: 7_level_21,Unnamed: 8_level_21,Unnamed: 9_level_21,Unnamed: 10_level_21,Unnamed: 11_level_21,Unnamed: 12_level_21,Unnamed: 13_level_21,Unnamed: 14_level_21,Unnamed: 15_level_21,Unnamed: 16_level_21,Unnamed: 17_level_21,Unnamed: 18_level_21,Unnamed: 19_level_21,Unnamed: 20_level_21,Unnamed: 21_level_21,Unnamed: 22_level_21,Unnamed: 23_level_21,Unnamed: 24_level_21,Unnamed: 25_level_21,Unnamed: 26_level_21,Unnamed: 27_level_21,Unnamed: 28_level_21,Unnamed: 29_level_21,Unnamed: 30_level_21,Unnamed: 31_level_21,Unnamed: 32_level_21,Unnamed: 33_level_21,Unnamed: 34_level_21,Unnamed: 35_level_21,Unnamed: 36_level_21,Unnamed: 37_level_21
Weight?,Feature,Unnamed: 2_level_22,Unnamed: 3_level_22,Unnamed: 4_level_22,Unnamed: 5_level_22,Unnamed: 6_level_22,Unnamed: 7_level_22,Unnamed: 8_level_22,Unnamed: 9_level_22,Unnamed: 10_level_22,Unnamed: 11_level_22,Unnamed: 12_level_22,Unnamed: 13_level_22,Unnamed: 14_level_22,Unnamed: 15_level_22,Unnamed: 16_level_22,Unnamed: 17_level_22,Unnamed: 18_level_22,Unnamed: 19_level_22,Unnamed: 20_level_22,Unnamed: 21_level_22,Unnamed: 22_level_22,Unnamed: 23_level_22,Unnamed: 24_level_22,Unnamed: 25_level_22,Unnamed: 26_level_22,Unnamed: 27_level_22,Unnamed: 28_level_22,Unnamed: 29_level_22,Unnamed: 30_level_22,Unnamed: 31_level_22,Unnamed: 32_level_22,Unnamed: 33_level_22,Unnamed: 34_level_22,Unnamed: 35_level_22,Unnamed: 36_level_22,Unnamed: 37_level_22
Weight?,Feature,Unnamed: 2_level_23,Unnamed: 3_level_23,Unnamed: 4_level_23,Unnamed: 5_level_23,Unnamed: 6_level_23,Unnamed: 7_level_23,Unnamed: 8_level_23,Unnamed: 9_level_23,Unnamed: 10_level_23,Unnamed: 11_level_23,Unnamed: 12_level_23,Unnamed: 13_level_23,Unnamed: 14_level_23,Unnamed: 15_level_23,Unnamed: 16_level_23,Unnamed: 17_level_23,Unnamed: 18_level_23,Unnamed: 19_level_23,Unnamed: 20_level_23,Unnamed: 21_level_23,Unnamed: 22_level_23,Unnamed: 23_level_23,Unnamed: 24_level_23,Unnamed: 25_level_23,Unnamed: 26_level_23,Unnamed: 27_level_23,Unnamed: 28_level_23,Unnamed: 29_level_23,Unnamed: 30_level_23,Unnamed: 31_level_23,Unnamed: 32_level_23,Unnamed: 33_level_23,Unnamed: 34_level_23,Unnamed: 35_level_23,Unnamed: 36_level_23,Unnamed: 37_level_23
Weight?,Feature,Unnamed: 2_level_24,Unnamed: 3_level_24,Unnamed: 4_level_24,Unnamed: 5_level_24,Unnamed: 6_level_24,Unnamed: 7_level_24,Unnamed: 8_level_24,Unnamed: 9_level_24,Unnamed: 10_level_24,Unnamed: 11_level_24,Unnamed: 12_level_24,Unnamed: 13_level_24,Unnamed: 14_level_24,Unnamed: 15_level_24,Unnamed: 16_level_24,Unnamed: 17_level_24,Unnamed: 18_level_24,Unnamed: 19_level_24,Unnamed: 20_level_24,Unnamed: 21_level_24,Unnamed: 22_level_24,Unnamed: 23_level_24,Unnamed: 24_level_24,Unnamed: 25_level_24,Unnamed: 26_level_24,Unnamed: 27_level_24,Unnamed: 28_level_24,Unnamed: 29_level_24,Unnamed: 30_level_24,Unnamed: 31_level_24,Unnamed: 32_level_24,Unnamed: 33_level_24,Unnamed: 34_level_24,Unnamed: 35_level_24,Unnamed: 36_level_24,Unnamed: 37_level_24
Weight?,Feature,Unnamed: 2_level_25,Unnamed: 3_level_25,Unnamed: 4_level_25,Unnamed: 5_level_25,Unnamed: 6_level_25,Unnamed: 7_level_25,Unnamed: 8_level_25,Unnamed: 9_level_25,Unnamed: 10_level_25,Unnamed: 11_level_25,Unnamed: 12_level_25,Unnamed: 13_level_25,Unnamed: 14_level_25,Unnamed: 15_level_25,Unnamed: 16_level_25,Unnamed: 17_level_25,Unnamed: 18_level_25,Unnamed: 19_level_25,Unnamed: 20_level_25,Unnamed: 21_level_25,Unnamed: 22_level_25,Unnamed: 23_level_25,Unnamed: 24_level_25,Unnamed: 25_level_25,Unnamed: 26_level_25,Unnamed: 27_level_25,Unnamed: 28_level_25,Unnamed: 29_level_25,Unnamed: 30_level_25,Unnamed: 31_level_25,Unnamed: 32_level_25,Unnamed: 33_level_25,Unnamed: 34_level_25,Unnamed: 35_level_25,Unnamed: 36_level_25,Unnamed: 37_level_25
Weight?,Feature,Unnamed: 2_level_26,Unnamed: 3_level_26,Unnamed: 4_level_26,Unnamed: 5_level_26,Unnamed: 6_level_26,Unnamed: 7_level_26,Unnamed: 8_level_26,Unnamed: 9_level_26,Unnamed: 10_level_26,Unnamed: 11_level_26,Unnamed: 12_level_26,Unnamed: 13_level_26,Unnamed: 14_level_26,Unnamed: 15_level_26,Unnamed: 16_level_26,Unnamed: 17_level_26,Unnamed: 18_level_26,Unnamed: 19_level_26,Unnamed: 20_level_26,Unnamed: 21_level_26,Unnamed: 22_level_26,Unnamed: 23_level_26,Unnamed: 24_level_26,Unnamed: 25_level_26,Unnamed: 26_level_26,Unnamed: 27_level_26,Unnamed: 28_level_26,Unnamed: 29_level_26,Unnamed: 30_level_26,Unnamed: 31_level_26,Unnamed: 32_level_26,Unnamed: 33_level_26,Unnamed: 34_level_26,Unnamed: 35_level_26,Unnamed: 36_level_26,Unnamed: 37_level_26
Weight?,Feature,Unnamed: 2_level_27,Unnamed: 3_level_27,Unnamed: 4_level_27,Unnamed: 5_level_27,Unnamed: 6_level_27,Unnamed: 7_level_27,Unnamed: 8_level_27,Unnamed: 9_level_27,Unnamed: 10_level_27,Unnamed: 11_level_27,Unnamed: 12_level_27,Unnamed: 13_level_27,Unnamed: 14_level_27,Unnamed: 15_level_27,Unnamed: 16_level_27,Unnamed: 17_level_27,Unnamed: 18_level_27,Unnamed: 19_level_27,Unnamed: 20_level_27,Unnamed: 21_level_27,Unnamed: 22_level_27,Unnamed: 23_level_27,Unnamed: 24_level_27,Unnamed: 25_level_27,Unnamed: 26_level_27,Unnamed: 27_level_27,Unnamed: 28_level_27,Unnamed: 29_level_27,Unnamed: 30_level_27,Unnamed: 31_level_27,Unnamed: 32_level_27,Unnamed: 33_level_27,Unnamed: 34_level_27,Unnamed: 35_level_27,Unnamed: 36_level_27,Unnamed: 37_level_27
Weight?,Feature,Unnamed: 2_level_28,Unnamed: 3_level_28,Unnamed: 4_level_28,Unnamed: 5_level_28,Unnamed: 6_level_28,Unnamed: 7_level_28,Unnamed: 8_level_28,Unnamed: 9_level_28,Unnamed: 10_level_28,Unnamed: 11_level_28,Unnamed: 12_level_28,Unnamed: 13_level_28,Unnamed: 14_level_28,Unnamed: 15_level_28,Unnamed: 16_level_28,Unnamed: 17_level_28,Unnamed: 18_level_28,Unnamed: 19_level_28,Unnamed: 20_level_28,Unnamed: 21_level_28,Unnamed: 22_level_28,Unnamed: 23_level_28,Unnamed: 24_level_28,Unnamed: 25_level_28,Unnamed: 26_level_28,Unnamed: 27_level_28,Unnamed: 28_level_28,Unnamed: 29_level_28,Unnamed: 30_level_28,Unnamed: 31_level_28,Unnamed: 32_level_28,Unnamed: 33_level_28,Unnamed: 34_level_28,Unnamed: 35_level_28,Unnamed: 36_level_28,Unnamed: 37_level_28
Weight?,Feature,Unnamed: 2_level_29,Unnamed: 3_level_29,Unnamed: 4_level_29,Unnamed: 5_level_29,Unnamed: 6_level_29,Unnamed: 7_level_29,Unnamed: 8_level_29,Unnamed: 9_level_29,Unnamed: 10_level_29,Unnamed: 11_level_29,Unnamed: 12_level_29,Unnamed: 13_level_29,Unnamed: 14_level_29,Unnamed: 15_level_29,Unnamed: 16_level_29,Unnamed: 17_level_29,Unnamed: 18_level_29,Unnamed: 19_level_29,Unnamed: 20_level_29,Unnamed: 21_level_29,Unnamed: 22_level_29,Unnamed: 23_level_29,Unnamed: 24_level_29,Unnamed: 25_level_29,Unnamed: 26_level_29,Unnamed: 27_level_29,Unnamed: 28_level_29,Unnamed: 29_level_29,Unnamed: 30_level_29,Unnamed: 31_level_29,Unnamed: 32_level_29,Unnamed: 33_level_29,Unnamed: 34_level_29,Unnamed: 35_level_29,Unnamed: 36_level_29,Unnamed: 37_level_29
Weight?,Feature,Unnamed: 2_level_30,Unnamed: 3_level_30,Unnamed: 4_level_30,Unnamed: 5_level_30,Unnamed: 6_level_30,Unnamed: 7_level_30,Unnamed: 8_level_30,Unnamed: 9_level_30,Unnamed: 10_level_30,Unnamed: 11_level_30,Unnamed: 12_level_30,Unnamed: 13_level_30,Unnamed: 14_level_30,Unnamed: 15_level_30,Unnamed: 16_level_30,Unnamed: 17_level_30,Unnamed: 18_level_30,Unnamed: 19_level_30,Unnamed: 20_level_30,Unnamed: 21_level_30,Unnamed: 22_level_30,Unnamed: 23_level_30,Unnamed: 24_level_30,Unnamed: 25_level_30,Unnamed: 26_level_30,Unnamed: 27_level_30,Unnamed: 28_level_30,Unnamed: 29_level_30,Unnamed: 30_level_30,Unnamed: 31_level_30,Unnamed: 32_level_30,Unnamed: 33_level_30,Unnamed: 34_level_30,Unnamed: 35_level_30,Unnamed: 36_level_30,Unnamed: 37_level_30
Weight?,Feature,Unnamed: 2_level_31,Unnamed: 3_level_31,Unnamed: 4_level_31,Unnamed: 5_level_31,Unnamed: 6_level_31,Unnamed: 7_level_31,Unnamed: 8_level_31,Unnamed: 9_level_31,Unnamed: 10_level_31,Unnamed: 11_level_31,Unnamed: 12_level_31,Unnamed: 13_level_31,Unnamed: 14_level_31,Unnamed: 15_level_31,Unnamed: 16_level_31,Unnamed: 17_level_31,Unnamed: 18_level_31,Unnamed: 19_level_31,Unnamed: 20_level_31,Unnamed: 21_level_31,Unnamed: 22_level_31,Unnamed: 23_level_31,Unnamed: 24_level_31,Unnamed: 25_level_31,Unnamed: 26_level_31,Unnamed: 27_level_31,Unnamed: 28_level_31,Unnamed: 29_level_31,Unnamed: 30_level_31,Unnamed: 31_level_31,Unnamed: 32_level_31,Unnamed: 33_level_31,Unnamed: 34_level_31,Unnamed: 35_level_31,Unnamed: 36_level_31,Unnamed: 37_level_31
Weight?,Feature,Unnamed: 2_level_32,Unnamed: 3_level_32,Unnamed: 4_level_32,Unnamed: 5_level_32,Unnamed: 6_level_32,Unnamed: 7_level_32,Unnamed: 8_level_32,Unnamed: 9_level_32,Unnamed: 10_level_32,Unnamed: 11_level_32,Unnamed: 12_level_32,Unnamed: 13_level_32,Unnamed: 14_level_32,Unnamed: 15_level_32,Unnamed: 16_level_32,Unnamed: 17_level_32,Unnamed: 18_level_32,Unnamed: 19_level_32,Unnamed: 20_level_32,Unnamed: 21_level_32,Unnamed: 22_level_32,Unnamed: 23_level_32,Unnamed: 24_level_32,Unnamed: 25_level_32,Unnamed: 26_level_32,Unnamed: 27_level_32,Unnamed: 28_level_32,Unnamed: 29_level_32,Unnamed: 30_level_32,Unnamed: 31_level_32,Unnamed: 32_level_32,Unnamed: 33_level_32,Unnamed: 34_level_32,Unnamed: 35_level_32,Unnamed: 36_level_32,Unnamed: 37_level_32
Weight?,Feature,Unnamed: 2_level_33,Unnamed: 3_level_33,Unnamed: 4_level_33,Unnamed: 5_level_33,Unnamed: 6_level_33,Unnamed: 7_level_33,Unnamed: 8_level_33,Unnamed: 9_level_33,Unnamed: 10_level_33,Unnamed: 11_level_33,Unnamed: 12_level_33,Unnamed: 13_level_33,Unnamed: 14_level_33,Unnamed: 15_level_33,Unnamed: 16_level_33,Unnamed: 17_level_33,Unnamed: 18_level_33,Unnamed: 19_level_33,Unnamed: 20_level_33,Unnamed: 21_level_33,Unnamed: 22_level_33,Unnamed: 23_level_33,Unnamed: 24_level_33,Unnamed: 25_level_33,Unnamed: 26_level_33,Unnamed: 27_level_33,Unnamed: 28_level_33,Unnamed: 29_level_33,Unnamed: 30_level_33,Unnamed: 31_level_33,Unnamed: 32_level_33,Unnamed: 33_level_33,Unnamed: 34_level_33,Unnamed: 35_level_33,Unnamed: 36_level_33,Unnamed: 37_level_33
Weight?,Feature,Unnamed: 2_level_34,Unnamed: 3_level_34,Unnamed: 4_level_34,Unnamed: 5_level_34,Unnamed: 6_level_34,Unnamed: 7_level_34,Unnamed: 8_level_34,Unnamed: 9_level_34,Unnamed: 10_level_34,Unnamed: 11_level_34,Unnamed: 12_level_34,Unnamed: 13_level_34,Unnamed: 14_level_34,Unnamed: 15_level_34,Unnamed: 16_level_34,Unnamed: 17_level_34,Unnamed: 18_level_34,Unnamed: 19_level_34,Unnamed: 20_level_34,Unnamed: 21_level_34,Unnamed: 22_level_34,Unnamed: 23_level_34,Unnamed: 24_level_34,Unnamed: 25_level_34,Unnamed: 26_level_34,Unnamed: 27_level_34,Unnamed: 28_level_34,Unnamed: 29_level_34,Unnamed: 30_level_34,Unnamed: 31_level_34,Unnamed: 32_level_34,Unnamed: 33_level_34,Unnamed: 34_level_34,Unnamed: 35_level_34,Unnamed: 36_level_34,Unnamed: 37_level_34
Weight?,Feature,Unnamed: 2_level_35,Unnamed: 3_level_35,Unnamed: 4_level_35,Unnamed: 5_level_35,Unnamed: 6_level_35,Unnamed: 7_level_35,Unnamed: 8_level_35,Unnamed: 9_level_35,Unnamed: 10_level_35,Unnamed: 11_level_35,Unnamed: 12_level_35,Unnamed: 13_level_35,Unnamed: 14_level_35,Unnamed: 15_level_35,Unnamed: 16_level_35,Unnamed: 17_level_35,Unnamed: 18_level_35,Unnamed: 19_level_35,Unnamed: 20_level_35,Unnamed: 21_level_35,Unnamed: 22_level_35,Unnamed: 23_level_35,Unnamed: 24_level_35,Unnamed: 25_level_35,Unnamed: 26_level_35,Unnamed: 27_level_35,Unnamed: 28_level_35,Unnamed: 29_level_35,Unnamed: 30_level_35,Unnamed: 31_level_35,Unnamed: 32_level_35,Unnamed: 33_level_35,Unnamed: 34_level_35,Unnamed: 35_level_35,Unnamed: 36_level_35,Unnamed: 37_level_35
Weight?,Feature,Unnamed: 2_level_36,Unnamed: 3_level_36,Unnamed: 4_level_36,Unnamed: 5_level_36,Unnamed: 6_level_36,Unnamed: 7_level_36,Unnamed: 8_level_36,Unnamed: 9_level_36,Unnamed: 10_level_36,Unnamed: 11_level_36,Unnamed: 12_level_36,Unnamed: 13_level_36,Unnamed: 14_level_36,Unnamed: 15_level_36,Unnamed: 16_level_36,Unnamed: 17_level_36,Unnamed: 18_level_36,Unnamed: 19_level_36,Unnamed: 20_level_36,Unnamed: 21_level_36,Unnamed: 22_level_36,Unnamed: 23_level_36,Unnamed: 24_level_36,Unnamed: 25_level_36,Unnamed: 26_level_36,Unnamed: 27_level_36,Unnamed: 28_level_36,Unnamed: 29_level_36,Unnamed: 30_level_36,Unnamed: 31_level_36,Unnamed: 32_level_36,Unnamed: 33_level_36,Unnamed: 34_level_36,Unnamed: 35_level_36,Unnamed: 36_level_36,Unnamed: 37_level_36
Weight?,Feature,Unnamed: 2_level_37,Unnamed: 3_level_37,Unnamed: 4_level_37,Unnamed: 5_level_37,Unnamed: 6_level_37,Unnamed: 7_level_37,Unnamed: 8_level_37,Unnamed: 9_level_37,Unnamed: 10_level_37,Unnamed: 11_level_37,Unnamed: 12_level_37,Unnamed: 13_level_37,Unnamed: 14_level_37,Unnamed: 15_level_37,Unnamed: 16_level_37,Unnamed: 17_level_37,Unnamed: 18_level_37,Unnamed: 19_level_37,Unnamed: 20_level_37,Unnamed: 21_level_37,Unnamed: 22_level_37,Unnamed: 23_level_37,Unnamed: 24_level_37,Unnamed: 25_level_37,Unnamed: 26_level_37,Unnamed: 27_level_37,Unnamed: 28_level_37,Unnamed: 29_level_37,Unnamed: 30_level_37,Unnamed: 31_level_37,Unnamed: 32_level_37,Unnamed: 33_level_37,Unnamed: 34_level_37,Unnamed: 35_level_37,Unnamed: 36_level_37,Unnamed: 37_level_37
+4.100,BOS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+4.041,bias,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+3.716,+1:word.lower():pm,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+3.372,postag:PRON,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+3.132,-1:word.lower():haryana,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
… 2559 more positive …,… 2559 more positive …,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
… 1394 more negative …,… 1394 more negative …,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-3.340,word.lower():evening,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-3.532,word.lower():morning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-3.636,word.lower():annual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

Weight?,Feature
+4.100,BOS
+4.041,bias
+3.716,+1:word.lower():pm
+3.372,postag:PRON
+3.132,-1:word.lower():haryana
… 2559 more positive …,… 2559 more positive …
… 1394 more negative …,… 1394 more negative …
-3.340,word.lower():evening
-3.532,word.lower():morning
-3.636,word.lower():annual

Weight?,Feature
+4.430,BOS
+3.687,-1:word.lower()::
+3.622,word.lower():11.20am
+3.255,-1:word.lower():figure
+3.101,word.lower():hundreds
+3.020,word.lower():thousands
+2.889,word.lower():asr995
+2.695,-1:word.lower():avenue
+2.621,-1:word.lower():save
… 672 more positive …,… 672 more positive …

Weight?,Feature
+3.304,-1:word.lower():2.2
+2.846,-1:word.lower():only
+2.162,-1:word.lower():40
+2.056,+1:word.lower():thirds
+2.003,word.lower():thirds
+1.870,+1:word.lower():notes
+1.829,word[-3:]:rds
+1.811,-1:word.lower():years
+1.715,-1:word.lower():87.55
+1.689,word[-2:]:ld

Weight?,Feature
+4.135,word[-3:]:day
+4.069,word.lower():annual
+4.068,-1:word.lower():then
+3.685,word.lower():year
+3.612,word.lower():monthly
+3.539,word.lower():today
+2.962,-1:word.lower():evident
+2.952,word.lower():12m
+2.952,word[-3:]:12m
+2.952,word[-2:]:2m

Weight?,Feature
+3.978,word.lower():year
+3.257,word.lower():day
+3.068,word.lower():years
+3.028,+1:word.lower():covered
+3.013,word[-3:]:ber
+2.619,-1:word.lower():last
+2.384,-1:word.lower():period
+2.151,-1:word.lower():oct
+2.148,word.lower():weeks
… 566 more positive …,… 566 more positive …

Weight?,Feature
+2.235,word.lower():ndp
+2.235,word[-3:]:NDP
+2.235,word[-2:]:DP
+2.021,word.lower():admm
+2.021,word[-3:]:DMM
+2.021,word[-2:]:MM
+1.664,word.lower():budget
+1.653,word[-3:]:get
+1.595,-1:word.lower():difficult
+1.554,word.isupper()

Weight?,Feature
+1.963,word.lower():japan
+1.960,word[-3:]:pan
+1.803,word.lower():movement
+1.757,+1:word.lower():with
+1.625,+1:word.lower():are
+1.393,+1:word.lower():movement
+1.329,+1:word.lower():chest
+1.190,+1:word.lower():awards
+1.082,-1:word.lower():budget
+1.049,word.lower():parade

Weight?,Feature
+2.977,-1:word.lower():studios
+2.715,word[-2:]:na
+2.570,word.lower():istana
+2.272,word.lower():vivocity
+2.053,word.lower():mosques
+2.014,word.lower():cte
+2.014,word[-3:]:CTE
+2.011,+1:word.lower():line
+1.914,word[-2:]:TE
+1.834,word[-2:]:SH

Weight?,Feature
+2.384,word[-3:]:tal
+2.347,+1:word.lower():during
+2.261,-1:word.lower():-
+2.096,word[-3:]:way
+2.051,word.lower():expressway
+2.047,-1:word.lower():orchard
+1.958,-1:word.lower():police
+1.948,+1:word.lower():station
+1.926,word.lower():mosque
+1.917,word.lower():hospital

Weight?,Feature
+3.567,word.lower():singapore
+3.170,word.lower():pahang
+3.065,+1:word.lower():united
+2.943,word[-2:]:ia
+2.933,word[-3:]:US
+2.839,word.lower():us
+2.556,word[-2:]:US
+2.415,word.lower():malaysia
+2.183,word.lower():china
+2.157,word[-3:]:ORE

Weight?,Feature
+2.855,-1:word.lower():jamiyah
+2.086,word[-2:]:ia
+1.967,word.lower():california
+1.878,-1:word.lower():united
+1.770,word.lower():kong
+1.699,word[-3:]:nia
+1.693,word[-3:]:tes
+1.669,-1:word.lower():hong
+1.606,-1:word.lower():south
+1.463,-1:word.lower():santa

Weight?,Feature
+2.739,word.lower():english
+2.474,word[-3:]:ish
+2.184,word.lower():mandarin
+2.161,word[-3:]:ese
+2.161,word[-3:]:rin
+2.133,word.lower():malay
+2.132,+1:word.lower():up
+2.073,word[-3:]:lay
+2.014,word.istitle()
+1.940,word[-2:]:sh

Weight?,Feature
+1.354,+1:word.lower():language
+1.351,word.lower():language
+1.315,word[-3:]:age
+1.249,-1:word.lower():-
+1.152,word[-2:]:ge
+1.152,-1:word.lower():chinese
+0.831,+1:word.lower():daily
+0.746,-1:postag:PUNCT
+0.746,-1:postag[:2]:PU
+0.745,postag:PUNCT

Weight?,Feature
+2.444,word.lower():7½-year
+2.404,-1:word.lower():under
+2.317,+1:word.lower():term
+1.904,+1:word.lower():penal
+1.745,word.lower():section
+1.351,+1:word.lower():immigration
+1.261,-1:word.lower():of
+1.237,word[-3:]:77A
+1.237,word[-2:]:7A
+1.237,word.lower():377a

Weight?,Feature
+2.833,+1:word.lower():act
+1.748,-1:word.lower():section
+1.208,+1:word.lower():for
+1.146,word[-3:]:rds
+1.128,-1:word.lower():paris
+1.127,word.lower():agreement
+1.089,+1:word.lower():arms
+1.085,-1:word.lower():13
+1.052,word[-3:]:Act
+1.052,word.lower():act

Weight?,Feature
+2.906,-1:word.lower():penjuru
+2.748,word.lower():tampines
+2.696,word.lower():woodlands
+2.597,+1:word.lower():around
+2.469,word.lower():asia
+2.122,word.lower():bendemeer
+2.044,word.lower():jurong
+2.015,-1:word.lower():in
+1.965,word.istitle()
+1.877,+1:word.lower():south

Weight?,Feature
+2.342,-1:word.lower():517
+1.816,+1:word.lower():china
+1.774,word.lower():geylang
+1.760,-1:word.lower():mrt
+1.654,+1:word.lower():raged
+1.582,word[-3:]:ang
+1.522,+1:word.lower():road
+1.510,word.lower():river
+1.486,word.lower():west
… 407 more positive …,… 407 more positive …

Weight?,Feature
+4.039,-1:word.lower():$
+2.914,+1:word.lower():per
+2.739,+1:word.lower():50
+2.112,word.isupper()
+1.727,+1:word.lower():incentive
+1.661,+1:word.lower():20
+1.645,-1:postag[:2]:SY
+1.645,-1:postag:SYM
+1.528,-1:word.lower():government
+1.518,"word.lower():s$426,000"

Weight?,Feature
+2.803,-1:word.lower():$
+2.507,+1:word.lower():4.574
+2.176,"+1:word.lower():25,000"
+2.165,+1:word.lower():cent
+2.020,"+1:word.lower():3,100"
+1.988,"+1:word.lower():65,000"
+1.962,+1:word.lower():1.74
+1.706,-1:postag[:2]:SY
+1.706,-1:postag:SYM
+1.628,-1:word.lower():between

Weight?,Feature
+4.611,word.istitle()
+3.114,word.lower():singaporeans
+2.644,word[-3:]:ean
+2.406,+1:word.lower():can
+2.399,word[-3:]:ans
+2.364,word.lower():british
+2.355,+1:word.lower():rubel
+2.193,word[-3:]:ese
+2.130,word.lower():padang
+2.058,word.lower():singaporean

Weight?,Feature
+1.417,word[-2:]:an
+1.394,word.lower():muslim
+1.394,word[-3:]:lim
+1.352,word[-2:]:im
+1.222,word.istitle()
+1.157,-1:word.istitle()
+1.028,word[-3:]:kan
+1.028,word.lower():lankan
+1.027,-1:word.lower():sri
+0.898,+1:word.lower():workers

Weight?,Feature
+4.068,word[-2:]:th
+3.744,word.lower():first
+2.775,word.lower():second
+2.702,word[-3:]:ond
+2.353,word[-3:]:0th
+2.122,word.lower():third
+2.121,word[-3:]:ird
+2.022,word.lower():1cm
+2.022,word[-3:]:1cm
… 88 more positive …,… 88 more positive …

Weight?,Feature
+4.227,word.lower():parliament
+4.072,word.lower():wang
+3.486,word[-3:]:ram
+3.392,word.lower():twitter
+3.016,word.lower():congress
+2.696,word.isupper()
+2.485,word.lower():apple
+2.433,+1:word.lower():ritual
+2.390,word[-2:]:CA
+2.331,word.lower():capitaland

Weight?,Feature
+2.737,+1:word.lower():merdeka
+2.354,+1:word.lower():research
+2.245,+1:word.lower():times
+2.086,-1:word.lower():generation
+2.081,word.lower():digital
+2.039,-1:word.lower():
+1.809,-1:word.lower():singapore
+1.803,-1:word.lower():cherish
+1.779,word[-3:]:Pei
+1.779,word.lower():pei

Weight?,Feature
+1.366,+1:word.lower():per
+1.200,word.isdigit()
+0.894,postag[:2]:NU
+0.894,postag:NUM
+0.795,-1:word.lower():while
+0.732,+1:postag:ADP
+0.718,word.lower():st
+0.645,+1:word.lower():%
+0.638,+1:word.lower():andrew
… 79 more positive …,… 79 more positive …

Weight?,Feature
+1.436,+1:word.lower():said
+1.042,word.lower():cent
+1.038,-1:word.lower():per
+1.011,word[-3:]:ent
+1.005,word[-2:]:nt
+0.946,-1:word.lower():20
+0.912,-1:word.lower():st
+0.892,word.lower():andrew
+0.840,+1:postag:PROPN
+0.745,postag[:2]:NO

Weight?,Feature
+3.613,-1:word.lower():judge
+2.607,word.istitle()
+2.516,word.lower():chan
+2.485,word.lower():daryati
+2.388,word[-3:]:the
+2.360,-1:word.lower():justice
+2.294,-1:word.lower():minister
+2.205,-1:word.lower():prosecutor
+2.073,+1:word.lower():loy
+2.055,word.lower():theophileous

Weight?,Feature
+2.298,-1:word.lower():-
+2.153,+1:word.lower():associate
+2.098,+1:word.lower():supra
+2.081,-1:word.lower():pioneer
+1.866,word.lower():an
+1.858,word.lower():generation
+1.842,-1:word.lower():dpp
+1.825,-1:word.lower():esm
+1.742,-1:word.lower():ms
+1.703,word[-3:]:an

Weight?,Feature
+2.162,word.lower():pmd
+2.162,word[-3:]:PMD
+2.162,word[-2:]:MD
+1.887,word.lower():ecstasy
+1.835,word[-3:]:asy
+1.825,word.lower():stars
+1.795,word[-2:]:sy
+1.729,word.lower():mytransport
+1.718,word[-3:]:GST
+1.718,word.lower():gst

Weight?,Feature
+2.043,word.lower():package
+1.302,-1:word.lower():payment
+1.270,"+1:word.lower():,"
+1.266,-1:word.lower():mytax
+1.263,word.lower():portal
+1.246,word[-3:]:age
+1.190,word.isupper()
+1.183,+1:word.lower():voucher
+1.181,word[-2:]:ge
+1.155,word.lower():gst

Weight?,Feature
+2.571,+1:word.lower():g
+2.306,+1:word.lower():m
+2.290,-1:word.lower():after
+2.181,+1:word.lower():kg
+2.170,+1:word.lower():maths
+1.675,+1:word.lower():cm
+1.559,-1:word.lower():main
+1.396,-1:word.lower():barely
+1.213,+1:word.lower():nautical
+1.200,-1:word.lower():by

Weight?,Feature
+2.197,word.lower():kg
+2.197,word[-3:]:kg
+2.197,word[-2:]:kg
+1.990,word[-2:]:g
+1.990,word.lower():g
+1.990,word[-3:]:g
+1.730,word[-3:]:les
+1.676,word.lower():cm
+1.676,word[-3:]:cm
+1.676,word[-2:]:cm

Weight?,Feature
+3.359,word[-2:]:pm
+3.260,word.lower():evening
+3.011,word[-2:]:am
+2.800,word[-3:]:oon
+2.331,word.lower():afternoon
+2.238,-1:word.lower():on
+2.176,+1:word.lower():or
+2.139,+1:word.lower():hours
+2.103,-1:word.lower():office
+2.096,-1:word.lower():pm

Weight?,Feature
+2.776,word.lower():minutes
+2.614,word.lower():hours
+2.326,word[-3:]:urs
+2.062,word.lower():morning
+1.966,word.lower():afternoon
+1.909,word[-3:]:tes
+1.856,+1:word.lower():hour
+1.849,+1:word.lower():rush
+1.827,word[-3:]:oon
+1.689,word.lower():hour

Weight?,Feature
+3.900,word.lower():president
+2.911,word[-3:]:DPP
+2.911,word.lower():dpp
+2.911,word[-2:]:PP
+2.865,word.lower():minister
+2.642,+1:word.lower():choo
+2.409,word.lower():comptroller
+2.377,word[-3:]:ant
+2.114,word.lower():sergeant
+2.102,+1:word.lower():mesenas

Weight?,Feature
+2.832,+1:word.lower():commended
+2.473,-1:word.lower():-
+2.155,-1:word.lower():judicial
+2.078,word[-2:]:or
+1.772,word[-3:]:tor
+1.649,-1:word.lower():superintendent
+1.570,word[-3:]:ent
+1.565,-1:word.lower():dr
+1.548,-1:word.lower():senior
+1.532,+1:word.lower():at

Weight?,Feature
+2.117,word[-2:]:hD
+2.117,word[-3:]:PhD
+2.117,word.lower():phd
+1.552,+1:word.lower():at
+1.295,"-1:word.lower():"""
+1.273,-1:word.lower():a
+1.167,word.lower():performance
+1.044,word[-3:]:nce
+0.976,+1:word.lower():award
+0.955,+1:postag:ADP

Weight?,Feature
+1.172,-1:word.lower():performance
+1.150,-1:word.istitle()
+1.108,"+1:word.lower():"""
+1.016,+1:postag:ADP
+0.934,word.lower():award
+0.903,-1:word.lower():engineering
+0.890,word.lower():course
+0.862,word[-3:]:ers
+0.852,word[-3:]:rse
+0.814,word.lower():of


In [82]:
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True,
)
crf.fit(X_train, Y_train);
eli5.show_weights(crf, top=5, show=['transition_features'])

From \ To,O,B-CARDINAL,I-CARDINAL,B-DATE,I-DATE,B-EVENT,I-EVENT,B-FAC,I-FAC,B-GPE,I-GPE,B-LANGUAGE,I-LANGUAGE,B-LAW,I-LAW,B-LOC,I-LOC,B-MONEY,I-MONEY,B-NORP,I-NORP,B-ORDINAL,B-ORG,I-ORG,B-PERCENT,I-PERCENT,B-PERSON,I-PERSON,B-PRODUCT,I-PRODUCT,B-QUANTITY,I-QUANTITY,B-TIME,I-TIME,B-TITLE,I-TITLE,B-WORK_OF_ART,I-WORK_OF_ART
O,3.774,2.28,-3.191,1.979,-4.369,0.347,-2.114,0.867,-1.879,0.698,-1.216,0.017,-1.186,1.202,-3.101,1.04,-2.062,2.416,-4.514,0.796,-1.5,0.944,1.478,-2.588,1.179,-2.073,1.257,-2.489,0.109,-1.972,1.294,-2.324,0.981,-3.193,0.405,-3.21,0.407,-1.997
B-CARDINAL,1.174,0.0,5.097,0.205,-2.732,0.043,0.0,-0.089,-0.53,-0.029,0.0,0.0,0.0,0.0,0.0,-0.359,-1.009,-1.002,-1.616,1.176,0.0,-0.119,1.257,-0.761,0.0,-1.069,0.0,-0.688,0.925,-0.205,0.0,-1.654,-0.386,-1.944,-0.37,-0.227,0.0,-0.065
I-CARDINAL,0.802,-0.993,5.356,-1.224,-1.673,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.592,-0.794,0.072,0.0,0.0,-0.244,0.0,-0.413,-1.114,0.0,0.0,0.657,0.0,-0.049,-0.349,-0.197,-0.795,0.0,0.0,0.0,0.0
B-DATE,0.338,-1.073,-0.925,-0.542,5.228,0.159,0.0,-0.061,0.0,-0.378,0.0,0.0,0.0,0.0,-0.274,-1.172,-0.624,0.507,-1.181,-0.375,0.0,-0.956,0.004,-0.504,0.0,-0.15,-0.67,-0.221,-0.652,-0.315,-0.153,-0.36,1.473,-1.457,-0.035,-0.09,0.0,0.0
I-DATE,-0.641,-1.259,-1.496,-0.129,5.332,-1.177,-0.0,0.0,-0.004,0.0,0.0,0.0,0.0,0.0,-0.306,0.0,-0.109,-0.237,-0.682,0.0,0.0,-0.038,-0.621,-0.726,0.0,-0.681,-0.743,0.0,0.61,0.0,-0.075,-0.043,-0.155,-0.35,-0.293,-0.352,0.0,0.0
B-EVENT,-0.571,0.0,0.0,0.0,-0.091,0.0,6.057,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.061,-0.175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-EVENT,-0.494,0.0,0.0,0.0,-0.457,0.0,6.814,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016,0.0,0.0,0.0,0.0,0.0,0.0,-0.223,0.0,0.0,-0.214,-0.116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-FAC,0.053,0.0,0.0,0.0,-0.139,0.0,0.0,0.0,7.052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.396,0.0,0.0,0.0,0.0,0.0,-0.279,-0.044,0.0,0.0,-0.078,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-FAC,-0.312,-0.335,0.0,0.557,-0.16,0.0,0.0,0.936,5.635,-0.061,-0.0,0.0,0.0,0.0,0.0,0.0,-0.695,0.0,0.0,0.0,0.0,0.0,-0.182,-0.736,0.0,0.0,-0.709,-0.444,0.0,0.0,0.0,0.0,0.143,0.0,-0.132,0.0,0.0,0.0
B-GPE,0.451,-0.328,0.0,0.379,-0.468,0.0,-0.089,0.0,-0.789,0.0,5.195,0.0,0.0,0.0,-0.189,0.0,-0.416,0.0,0.0,0.0,0.0,0.0,0.618,-1.218,0.0,0.0,-0.5,-0.255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
eli5.show_weights(crf, top=10, targets=['O','B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC'])

From \ To,O,B-TITLE,I-TITLE,B-PERSON,I-PERSON,B-ORG,I-ORG,B-FAC,I-FAC,B-LOC,I-LOC
O,3.774,0.405,-3.21,1.257,-2.489,1.478,-2.588,0.867,-1.879,1.04,-2.062
B-TITLE,-0.768,0.218,5.377,1.78,0.358,-0.359,-1.228,0.0,-0.026,0.0,-0.08
I-TITLE,-1.036,0.0,4.752,2.156,-1.252,0.014,-1.973,0.0,-0.098,-0.012,-0.18
B-PERSON,0.203,-0.118,-0.222,-1.707,5.482,-0.603,-0.789,0.0,-0.846,0.0,-0.786
I-PERSON,0.272,-0.048,-0.12,-1.404,5.292,-0.219,0.184,0.0,-0.595,0.0,-0.679
B-ORG,0.423,0.0,0.002,-0.287,-1.373,-0.925,6.023,-0.145,0.564,-0.088,0.0
I-ORG,0.027,1.401,-1.064,0.188,-0.26,-0.534,5.918,-0.2,-0.852,-0.079,-0.969
B-FAC,0.053,0.0,0.0,-0.078,0.0,-0.279,-0.044,0.0,7.052,0.0,-0.396
I-FAC,-0.312,-0.132,0.0,-0.709,-0.444,-0.182,-0.736,0.936,5.635,0.0,-0.695
B-LOC,-0.109,0.0,-0.014,-1.191,-1.062,-1.063,-0.984,0.0,-0.912,1.495,5.275

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10
+4.100,BOS,,,,,,,,,
+4.041,bias,,,,,,,,,
+3.716,+1:word.lower():pm,,,,,,,,,
+3.372,postag:PRON,,,,,,,,,
+3.132,-1:word.lower():haryana,,,,,,,,,
… 2559 more positive …,… 2559 more positive …,,,,,,,,,
… 1394 more negative …,… 1394 more negative …,,,,,,,,,
-3.340,word.lower():evening,,,,,,,,,
-3.532,word.lower():morning,,,,,,,,,
-3.636,word.lower():annual,,,,,,,,,

Weight?,Feature
+4.100,BOS
+4.041,bias
+3.716,+1:word.lower():pm
+3.372,postag:PRON
+3.132,-1:word.lower():haryana
… 2559 more positive …,… 2559 more positive …
… 1394 more negative …,… 1394 more negative …
-3.340,word.lower():evening
-3.532,word.lower():morning
-3.636,word.lower():annual

Weight?,Feature
+3.900,word.lower():president
+2.911,word[-3:]:DPP
+2.911,word.lower():dpp
+2.911,word[-2:]:PP
+2.865,word.lower():minister
+2.642,+1:word.lower():choo
+2.409,word.lower():comptroller
+2.377,word[-3:]:ant
+2.114,word.lower():sergeant
+2.102,+1:word.lower():mesenas

Weight?,Feature
+2.832,+1:word.lower():commended
+2.473,-1:word.lower():-
+2.155,-1:word.lower():judicial
+2.078,word[-2:]:or
+1.772,word[-3:]:tor
+1.649,-1:word.lower():superintendent
+1.570,word[-3:]:ent
+1.565,-1:word.lower():dr
+1.548,-1:word.lower():senior
+1.532,+1:word.lower():at

Weight?,Feature
+3.613,-1:word.lower():judge
+2.607,word.istitle()
+2.516,word.lower():chan
+2.485,word.lower():daryati
+2.388,word[-3:]:the
+2.360,-1:word.lower():justice
+2.294,-1:word.lower():minister
+2.205,-1:word.lower():prosecutor
+2.073,+1:word.lower():loy
+2.055,word.lower():theophileous

Weight?,Feature
+2.298,-1:word.lower():-
+2.153,+1:word.lower():associate
+2.098,+1:word.lower():supra
+2.081,-1:word.lower():pioneer
+1.866,word.lower():an
+1.858,word.lower():generation
+1.842,-1:word.lower():dpp
+1.825,-1:word.lower():esm
+1.742,-1:word.lower():ms
+1.703,word[-3:]:an

Weight?,Feature
+4.227,word.lower():parliament
+4.072,word.lower():wang
+3.486,word[-3:]:ram
+3.392,word.lower():twitter
+3.016,word.lower():congress
+2.696,word.isupper()
+2.485,word.lower():apple
+2.433,+1:word.lower():ritual
+2.390,word[-2:]:CA
+2.331,word.lower():capitaland

Weight?,Feature
+2.737,+1:word.lower():merdeka
+2.354,+1:word.lower():research
+2.245,+1:word.lower():times
+2.086,-1:word.lower():generation
+2.081,word.lower():digital
+2.039,-1:word.lower():
+1.809,-1:word.lower():singapore
+1.803,-1:word.lower():cherish
+1.779,word[-3:]:Pei
+1.779,word.lower():pei

Weight?,Feature
+2.977,-1:word.lower():studios
+2.715,word[-2:]:na
+2.570,word.lower():istana
+2.272,word.lower():vivocity
+2.053,word.lower():mosques
+2.014,word.lower():cte
+2.014,word[-3:]:CTE
+2.011,+1:word.lower():line
+1.914,word[-2:]:TE
+1.834,word[-2:]:SH

Weight?,Feature
+2.384,word[-3:]:tal
+2.347,+1:word.lower():during
+2.261,-1:word.lower():-
+2.096,word[-3:]:way
+2.051,word.lower():expressway
+2.047,-1:word.lower():orchard
+1.958,-1:word.lower():police
+1.948,+1:word.lower():station
+1.926,word.lower():mosque
+1.917,word.lower():hospital

Weight?,Feature
+2.906,-1:word.lower():penjuru
+2.748,word.lower():tampines
+2.696,word.lower():woodlands
+2.597,+1:word.lower():around
+2.469,word.lower():asia
+2.122,word.lower():bendemeer
+2.044,word.lower():jurong
+2.015,-1:word.lower():in
+1.965,word.istitle()
+1.877,+1:word.lower():south

Weight?,Feature
+2.342,-1:word.lower():517
+1.816,+1:word.lower():china
+1.774,word.lower():geylang
+1.760,-1:word.lower():mrt
+1.654,+1:word.lower():raged
+1.582,word[-3:]:ang
+1.522,+1:word.lower():road
+1.510,word.lower():river
+1.486,word.lower():west
… 407 more positive …,… 407 more positive …


In [84]:
eli5.show_weights(crf, top=10, feature_re='^word\.is',
                  horizontal_layout=False, show=['targets'],targets=['B-TITLE','I-TITLE','B-PERSON','I-PERSON','B-ORG','I-ORG','B-FAC','I-FAC','B-LOC','I-LOC','B-EVENT','I-EVENT'])

Weight?,Feature
1.202,word.isupper()
-0.081,word.istitle()
-0.396,word.isdigit()

Weight?,Feature
0.152,word.istitle()
-0.235,word.isupper()

Weight?,Feature
2.607,word.istitle()
0.251,word.isupper()
-0.137,word.isdigit()

Weight?,Feature
1.154,word.istitle()
-0.373,word.isupper()
-0.687,word.isdigit()

Weight?,Feature
2.696,word.isupper()
0.172,word.istitle()
-0.717,word.isdigit()

Weight?,Feature
1.14,word.istitle()
0.87,word.isupper()

Weight?,Feature
1.64,word.isupper()
0.049,word.istitle()

Weight?,Feature
0.635,word.istitle()
-0.144,word.isupper()

Weight?,Feature
1.965,word.istitle()
-0.692,word.isupper()
-1.021,word.isdigit()

Weight?,Feature
0.545,word.istitle()
-0.339,word.isdigit()
-0.814,word.isupper()

Weight?,Feature
1.554,word.isupper()
0.112,word.istitle()

Weight?,Feature
0.579,word.istitle()
