Extract the linguistic features

In [1]:
"""
- Avg_words_per_sentence 
- Avg_syllables_per_word 
- Complex_word_percent   
- Difficult_word_percent 
- Long_sent_percent      
- Long_word_percent      
- Avg_letters_per_word   
- Comma_percent          
- Proper_noun_percent    
- Noun_percent           
- Pronoun_percent        
- Conj_percent           

- Tokens            
- Words             
- Sentences         
- N_words           
- N_sentences       
- N_syllables       
- N_polysyllables   
"""
import pandas as pd
import spacy
import pyphen
import benepar
SPACY_MODEL = "en_core_web_sm"
import en_core_web_sm

def _get_words(x):
    words = [token.text for token in x if token.is_punct != True]
    return words

def words_and_sentences(df):
    nlp = spacy.load('en_core_web_sm', exclude=['parser', 'ner'])
    nlp.add_pipe('sentencizer')    
    df['Tokens'] = df['Text'].apply(lambda x: nlp(x))    
    df['Words'] = df['Tokens'].apply(_get_words)    
    df['Sentences'] = df['Tokens'].apply(lambda x: list(x.sents))    
    df['N_words'] = df['Words'].apply(lambda x: len(x))    
    df['N_sentences'] = df['Sentences'].apply(lambda x: len(x))    
    df["Avg_words_per_sentence"] = df["N_words"] / df["N_sentences"]    
    return df

def _count_hyphens(text, dic):
    return dic.inserted(text).count("-")

def syllables(df):
    dic = pyphen.Pyphen(lang='en_EN')
    df["N_hyphens"] = df["Text"].apply(lambda x: _count_hyphens(x, dic))
    df["N_syllables"] = df["N_words"] + df["N_hyphens"]
    df["Avg_syllables_per_word"] = df["N_syllables"] / df["N_words"]
    df.drop(columns=["N_hyphens"], inplace=True)
    return df

def _get_dale_chall_easy_words():
    easy_words = set()
    with open("dale_chall_easy_word_list.txt") as file:
        lines = [line.rstrip('\n') for line in file]
        for line in lines:
            easy_words.add(line.lower())
    return easy_words


def _get_num_difficult_words(text, easy_words):
    n = 0
    for word in text:
        if word.lower() not in easy_words:
            n += 1
    return n


def difficult_words_pct(df):    
    easy_words = _get_dale_chall_easy_words()    
    df["Difficult_word_percent"] = df["Words"].apply(lambda x: _get_num_difficult_words(x, easy_words)) / df["N_words"]    
    return df

def _count_polysyllables(words, dic):
    n_complex = 0    
    for word in words:
        if dic.inserted(word).count("-") >= 2:
            n_complex += 1    
    return n_complex


def polysyllables(df):   
    dic = pyphen.Pyphen(lang='en_EN')
    df["N_polysyllables"] = df["Words"].apply(lambda x: _count_polysyllables(x, dic))    
    return df

def complex_words_pct(df):   
    df["Complex_word_percent"] = df["N_polysyllables"] / df["N_words"]    
    return df

def _get_n_long_sent(sentences):
    n = 0
    for sentence in sentences:
        if len(sentence) > 25:
            n += 1
    return n

def long_sent_pct(df):   
    df["Long_sent_percent"] = df["Sentences"].apply(_get_n_long_sent) / df["N_sentences"]    
    return df

def _get_n_long_word(words):
    n = 0
    for word in words:
        if len(word) > 8:
            n += 1
    return n

def long_word_pct(df):   
    df["Long_word_percent"] = df["Words"].apply(_get_n_long_word) / df["N_words"]    
    return df

def _get_n_letters(words):
    n = 0
    for word in words:
        n += len(word)
    return n

def avg_letters_per_word(df):   
    df["Avg_letters_per_word"] = df["Words"].apply(_get_n_letters) / df["N_words"]    
    return df

def _get_n_comma_sent(sentences):
    n = 0
    for sentence in sentences:
        if str(sentence).find(",") != -1:
            n += 1
    return n

def comma_pct(df):   
    # get percentage
    df["Comma_percent"] = df["Sentences"].apply(_get_n_comma_sent) / df["N_sentences"]    
    return df

def _get_n_pos(tokens, pos_list):
    n = 0
    for token in tokens:
        for pos in pos_list:
            if token.pos_ == pos:
                n += 1
    return n

def pos_features(df):    
    pos_list = ["NOUN", "PROPN"]
    df["Noun_percent"] = df["Tokens"].apply(lambda x: _get_n_pos(x, pos_list)) / df["N_words"]    
    pos_list = ["PROPN"]
    df["Proper_noun_percent"] = df["Tokens"].apply(lambda x: _get_n_pos(x, pos_list))/ df["N_words"]    
    pos_list = ["PRON"]
    df["Pronoun_percent"] = df["Tokens"].apply(lambda x: _get_n_pos(x, pos_list)) / df["N_words"]    
    pos_list = ["CONJ", "CCONJ"]
    df["Conj_percent"] = df["Tokens"].apply(lambda x: _get_n_pos(x, pos_list)) / df["N_words"]    
    return df

def remove_aux_features(df):   
    df.drop(columns=["Tokens", "Words", "Sentences", "N_words", "N_sentences", "N_syllables", "N_polysyllables"], inplace=True)    
    return df

"""
- NP_per_sent
- VP_per_sent
- PP_per_sent
- SBAR_per_sent
- SBARQ_per_sent
- avg_NP_size
- avg_VP_size
- avg_PP_size
- avg_parse_tree

"""
from collections import Counter, defaultdict
import pandas as pd
import spacy
import nltk
import benepar
from benepar import BeneparComponent, NonConstituentException
benepar.download('benepar_en3')

def _parse_tree_height(sent):
    
    children = list(sent._.children)
    if not children:
        return 0
    else:
        return max(_parse_tree_height(child) for child in children) + 1

def _get_constituents(tokens):
    const_counter = Counter()
    const_lengths = defaultdict(list)

    for sentence in tokens.sents:
        for const in sentence._.constituents:
            const_counter.update(Counter(const._.labels))
            for label in const._.labels:
                const_lengths[label].append(len(const))
    
    const_avgs = defaultdict(int)
    for key in const_lengths.keys():
        avg = 0.0
        for length in const_lengths[key]: 
            avg += length
        avg /= len(const_lengths[key])        
        const_avgs[key] = avg         
    return const_counter, const_avgs

def _get_parse_tree_height(tokens):   
    avg_parse_tree_height = 0.0    
    for sentence in tokens.sents:
        avg_parse_tree_height += _parse_tree_height(sentence)        
    n_sentences = len(list(tokens.sents))
    avg_parse_tree_height /= n_sentences    
    return avg_parse_tree_height, n_sentences

def _get_parse_tree_features(tokens):
    const_counter, const_avgs = _get_constituents(tokens)
    avg_parse_tree_height, n_sentences = _get_parse_tree_height(tokens)    
    NP_per_sent = const_counter['NP'] / n_sentences
    VP_per_sent = const_counter['VP'] / n_sentences
    PP_per_sent = const_counter['PP'] / n_sentences
    SBAR_per_sent = const_counter['SBAR'] / n_sentences
    SBARQ_per_sent = const_counter['SBARQ'] / n_sentences
    avg_NP_size = const_avgs['NP']
    avg_VP_size = const_avgs['VP']
    avg_PP_size = const_avgs['PP']
    avg_parse_tree = avg_parse_tree_height    
    return NP_per_sent, VP_per_sent, PP_per_sent, \
        SBAR_per_sent, SBARQ_per_sent, avg_NP_size, \
        avg_VP_size, avg_PP_size, avg_parse_tree
    
def parse_tree_features(df):
    nlp = en_core_web_sm.load(disable=['ner'])
    if spacy.__version__.startswith('2'):
        nlp.add_pipe(benepar.BeneparComponent("benepar_en3"))
    else:
        nlp.add_pipe("benepar", config={"model": "benepar_en3"})
    df['B_Tokens'] = df['Text'].apply(lambda x: nlp(x))
    df['NP_per_sent'], df['VP_per_sent'], df['PP_per_sent'], \
    df['SBAR_per_sent'], df['SBARQ_per_sent'], df['avg_NP_size'], \
    df['avg_VP_size'], df['avg_PP_size'], df['avg_parse_tree'] = zip(*df['B_Tokens'].map(_get_parse_tree_features))
    df.drop(columns=["B_Tokens"], inplace=True)
    
    return df

[nltk_data] Downloading package benepar_en3 to
[nltk_data]     e:\Anaconda\nltk_data...
[nltk_data]   Package benepar_en3 is already up-to-date!


In [7]:
df = pd.read_csv("../data/TEST.csv", index_col = 1) #导入目标数据集
df['Text'] = df['Text'].astype(str)
df = words_and_sentences(df)
df = syllables(df)
df = difficult_words_pct(df)
df = polysyllables(df)
df = complex_words_pct(df)
df = long_sent_pct(df)
df = long_word_pct(df)
df = avg_letters_per_word(df)
df = comma_pct(df)
df = pos_features(df)
df = remove_aux_features(df)
df = parse_tree_features(df)
df.to_csv("../data/TEST_with_features.csv", encoding='utf-8') #特征保存路径



# Model

## correlation

In [41]:
from sklearn.metrics import ndcg_score
import numpy as np
from scipy import stats

def normalize(p, q):
    p, q = np.asarray(p), np.asarray(q)
    assert (p >= 0).all(), p
    assert (q >= 0).all()
    p, q = p / p.sum(), q / q.sum()
    return p, q

def JSD(p, q, base=2):
    p, q = normalize(p, q)
    m = 1. / 2 * (p + q)
    return stats.entropy(p, m, base=base) / 2. + stats.entropy(q, m, base=base) / 2.

def rnorm_sum_squares(x, y):
    x = np.array(x)
    y = np.array(y)
    assert x.shape == y.shape
    numerator = np.sum(np.square(x - y))
    denominator = np.sum(np.square(x) + np.square(y))
    rnorm_ss = np.sqrt(numerator / denominator)
    return rnorm_ss

def print_metrics(y_true, y_pred):
    metric = []
    Rjsd=1-JSD(y_true, y_pred, base=2) 
    RRNSS = 1-rnorm_sum_squares(y_true, y_pred)
    ndcg=ndcg_score([y_true], [y_pred])
    metric.append(Rjsd)
    metric.append(RRNSS)
    metric.append(ndcg)
    return metric  

In [42]:
from scipy.stats import spearmanr
from sklearn.model_selection import KFold
import numpy as np

def grid_search_cv_for_ensembles(model, max_depth_values, n_estimators_values, X, y, scoring_function, k=5, verbose=0):   
    best_score = 0.0
    best_n_estimators = 1
    best_max_depth = 1    
    for max_depth in max_depth_values: 
        for n_estimators in n_estimators_values:            
            kf = KFold(n_splits=k, random_state=None, shuffle=True)
            fold = 1
            scores = []
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model.set_hyperparams(max_depth, n_estimators)
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                scores.append(scoring_function(y_test, y_pred))
                fold += 1              
            score = np.mean(scores)            
            if score > best_score:
                best_score = score
                best_n_estimators = n_estimators
                best_max_depth = max_depth
    return best_max_depth, best_n_estimators

def find_best_C(model, c_values, X, y, scoring_function, k=5, verbose=0):
    best_score = 0.0
    best_c = 1.0    
    for c in c_values:             
        kf = KFold(n_splits=k, random_state=None, shuffle=True)
        fold = 1
        scores = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            model.set_hyperparams('linear', c)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            scores.append(scoring_function(y_test, y_pred))
            fold += 1
        score = np.mean(scores)
        if score > best_score:
            best_score = score
            best_c = c
    return best_c

In [43]:
import numpy as np
def discretize(y_pred):    
    for i in range(len(y_pred)):    
        if y_pred[i] < 0.5:
            y_pred[i] = 0.0
        elif y_pred[i] < 1.5:
            y_pred[i] = 1.0
        elif y_pred[i] < 2.5:
            y_pred[i] = 2.0
        elif y_pred[i] < 3.5:
            y_pred[i] = 3.0
        else:
            y_pred[i] = 4.0            
    return y_pred

## xgboost

XGBoost+Glove+feature

In [44]:
from xgboost import XGBRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class GloVeFeatXGBRegressor():
    def __init__(self, embedding_size=300, n_estimators=100, max_depth=3, learning_rate=0.1):
        # Load the GloVe word vector model
        self.glove_file = '../gloveglove.6B.300d.txt'
        self.glove_model = self.load_glove_model(self.glove_file)
        self.embedding_size = embedding_size
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.clf = XGBRegressor(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            learning_rate=self.learning_rate
        )

    def load_glove_model(self, glove_file):
        model = {}
        with open(glove_file, 'r', encoding='utf-8') as f:
            for line in f:
                split_line = line.strip().split()
                word = split_line[0]
                embedding = np.array([float(val) for val in split_line[1:]])
                model[word] = embedding
        return model

    def _get_sentence_embedding(self, sentence):
        vecs = [self.glove_model.get(word, np.zeros(self.embedding_size)) for word in sentence]
        vecs = np.array(vecs)
        return np.mean(vecs, axis=0)

    def train(self, X_train_text, X_train_feat, y_train):
        X_train_vecs = np.array([self._get_sentence_embedding(sentence) for sentence in X_train_text])
        X_train_vecs = np.hstack((X_train_vecs, X_train_feat))
        self.clf.fit(X_train_vecs, y_train)

    def predict(self, X_test_text, X_test_feat):
        X_test_vecs = np.array([self._get_sentence_embedding(sentence) for sentence in X_test_text])
        X_test_vecs = np.hstack((X_test_vecs, X_test_feat))
        return discretize(self.clf.predict(X_test_vecs))

    def evaluate(self, X_test_text, X_test_feat, y_test):
        y_pred = self.predict(X_test_text, X_test_feat)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')        
        return accuracy, precision, recall, f1


XGBoost+Glove

In [45]:
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class GloVeXGBRegressor():
    def __init__(self, embedding_size=300, n_estimators=100, max_depth=3, learning_rate=0.1):
        self.glove_file = '../glove/glove.6B.300d.txt'
        self.glove_model = self.load_glove_model(self.glove_file)
        self.embedding_size = embedding_size
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.clf = XGBRegressor(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            learning_rate=self.learning_rate
        )

    def load_glove_model(self, glove_file):
        model = {}
        with open(glove_file, 'r', encoding='utf-8') as f:
            for line in f:
                split_line = line.strip().split()
                word = split_line[0]
                embedding = np.array([float(val) for val in split_line[1:]])
                model[word] = embedding
        return model

    def _get_sentence_embedding(self, sentence):
        sentence_str = ' '.join(sentence)
        vecs = [self.glove_model[word] for word in sentence_str.split() if word in self.glove_model]
        vecs = np.array(vecs)
        return np.mean(vecs, axis=0)

    def train(self, X_train, y_train):
        X_train_vecs = np.array([self._get_sentence_embedding(sentence) for sentence in X_train])
        self.clf.fit(X_train_vecs, y_train)

    def predict(self, X_test):
        X_test_vecs = np.array([self._get_sentence_embedding(sentence) for sentence in X_test])
        return discretize(self.clf.predict(X_test_vecs))

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        print(y_pred)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')        
        return accuracy, precision, recall, f1

XGBoost+feature

In [46]:
from xgboost import XGBRegressor
import pickle
class XGBoost():    
    def __init__(self, max_depth=30, n_estimators=200, save_model=False, use_saved_model=False, model_path='xgboost.pickle'):
        self.model_path = model_path
        self.n_estimators = n_estimators
        self.save_model = save_model        
        if use_saved_model:
            with open(self.model_path, 'rb') as file:
                self.model = pickle.load(file)
        else:
            self.model = xgboost = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, objective="reg:squarederror")  
        
    def fit(self, X_train, y_train):
        self.model.fit(X_train, y_train)
        if self.save_model:
            with open(self.model_path, 'wb') as handle:
                pickle.dump(self.model, handle)
        
    def predict(self, X_test):
        return discretize(self.model.predict(X_test))
        
    def set_hyperparams(self, max_depth, n_estimators):    
        self.model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, objective="reg:squarederror")  

# load data

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
from sklearn.model_selection import train_test_split
def get_train_test(feature,lable):
    X_train, X_test, y_train, y_test=train_test_split(feature,lable, test_size=0.2, random_state=42,stratify=lable)
    X_train=np.array(X_train)
    X_test=np.array(X_test)
    y_train=np.array(y_train)
    y_train = y_train.ravel()
    y_test=np.array(y_test)
    y_test=y_test.ravel()
    return X_train, X_test, y_train, y_test


In [124]:
DATA_feature = pd.read_csv("../data/TEST_with_features.csv")
df =pd.DataFrame(DATA_feature)
lable=df.iloc[:,0:1]
feature=df.iloc[:,2:23]
DATA_feature_train, DATA_feature_test, DATA_feature_trainy, DATA_feature_testy=get_train_test(feature,lable)

DATA = pd.read_csv("../data/TEST.csv")
df =pd.DataFrame(DATA)
lable=df.iloc[:,1:2]
text=df.iloc[:,0:1]
DATA_train, DATA_test, DATA_trainy, DATA_testy=get_train_test(text,lable)

CEFR=pd.read_csv("../data/[CEFR]_test_with_features.csv")
CEFR_feature_testy=CEFR.iloc[:,1]   #label
CEFR_feature_test=CEFR.iloc[:,2:23] #feature
CEFR_feature_test=np.array(CEFR_feature_test)
CEFR_test=CEFR.iloc[:,0]    #text
CEFR_test=np.array(CEFR_test)

CLEC=pd.read_csv("../data/[CLEC]_test_with_features.csv")
CLEC_feature_testy=CLEC.iloc[:,1]
CLEC_feature_test=CLEC.iloc[:,2:23]
CLEC_feature_test=np.array(CLEC_feature_test)
CLEC_test=CLEC.iloc[:,0]
CLEC_test=np.array(CLEC_test)

CLOTH=pd.read_csv("../data/[CLOTH]_test_with_features.csv")
CLOTH_feature_testy=CLOTH.iloc[:,1]
CLOTH_feature_test=CLOTH.iloc[:,2:23]
CLOTH_feature_test=np.array(CLOTH_feature_test)
CLOTH_test=CLOTH.iloc[:,0]
CLOTH_test=np.array(CLOTH_test)

OSP=pd.read_csv("../data/[OSP]_test_with_features.csv")
OSP_feature_testy=OSP.iloc[:,1]
OSP_feature_test=OSP.iloc[:,2:23]
OSP_feature_test=np.array(OSP_feature_test)
OSP_test=OSP.iloc[:,0]
OSP_test=np.array(OSP_test)

NES=pd.read_csv("../data/[NES]_test_with_features.csv")
NES_feature_testy=NES.iloc[:,1]
NES_feature_test=NES.iloc[:,2:23]
NES_feature_test=np.array(NES_feature_test)
NES_test=NES.iloc[:,0]
NES_test=np.array(NES_test)

RACE=pd.read_csv("../data/[RACE]_test_with_features.csv")
RACE_feature_testy=RACE.iloc[:,1]
RACE_feature_test=RACE.iloc[:,2:23]
RACE_feature_test=np.array(RACE_feature_test)
RACE_test=RACE.iloc[:,0]
RACE_test=np.array(RACE_test)

# 2. Cross Corpus Readability Assessment

XGBoost+feature

In [60]:
tests=[[CEFR_feature_test,  CEFR_feature_testy],
       [CLEC_feature_test,  CLEC_feature_testy],
       [CLOTH_feature_test, CLOTH_feature_testy],
       [OSP_feature_test,   OSP_feature_testy],
       [NES_feature_test,   NES_feature_testy],
       [RACE_feature_test,  RACE_feature_testy]]

scoring_function = lambda y_true, y_pred: spearmanr(y_true, y_pred)[0]
max_depth_values = [5, 10, 15, 20, 30]
n_estimators_values = [10, 50, 100, 200]
max_depth, n_estimators = grid_search_cv_for_ensembles(XGBoost(), max_depth_values, n_estimators_values, DATA_feature_train, DATA_feature_trainy, scoring_function, k=3, verbose=1)
xgboost = XGBoost(save_model=True)
xgboost.fit(DATA_feature_train, DATA_feature_trainy)

result = {}
cor_metric = []
for test_feature, y in tests:
    y_pred = xgboost.predict(test_feature)
    result['Numy'] = y
    result['Numyp'] = y_pred
    df = pd.DataFrame(result)
    cor=print_metrics(y, y_pred)
    cor_metric.append(cor)
cor = pd.DataFrame(columns=['RJSD','RRNSS','NDCG'],index=['CEFR','CLEC','CLOTH','OSP','NES','RACE'],data=cor_metric)

In [61]:
cor

Unnamed: 0,RJSD,RRNSS,NDCG
CEFR,0.983163,0.817967,0.964841
CLEC,0.896874,0.509737,0.964119
CLOTH,0.918496,0.355605,0.986177
OSP,0.836295,0.304971,0.876998
NES,0.984289,0.453482,0.97608
RACE,0.940976,0.681214,0.996188


XGBoost+glove

In [108]:
tests=[[CEFR_test, CEFR_feature_testy],
       [CLEC_test, CLEC_feature_testy],
       [CLOTH_test, CLOTH_feature_testy],
       [OSP_test, OSP_feature_testy],
       [NES_test, NES_feature_testy],
       [RACE_test, RACE_feature_testy]]

model = GloVeXGBRegressor()
model.train(DATA_train, DATA_trainy)

cor_metric = []
result = {}
for X_test,y_test in tests:
    y_pred = model.predict(X_test)
    result['Numy'] = y_test
    result['Numyp'] = y_pred
    df = pd.DataFrame(result)
    cor=print_metrics(y_test, y_pred)
    cor_metric.append(cor)
cor = pd.DataFrame(columns=['NDCG','RJSD','RRNSS'],index=['CEFR','CLEC','CLOTH','OSP','NES','RACE'], data=cor_metric)

In [109]:
cor

Unnamed: 0,NDCG,RJSD,RRNSS
CEFR,0.955202,0.670231,0.918118
CLEC,0.984127,0.634014,0.953194
CLOTH,0.994789,0.804894,0.985264
OSP,0.793658,0.348196,0.827608
NES,0.959823,0.406672,0.934496
RACE,0.994134,0.769969,0.990361


XGBoost+glove+feature

In [126]:
tests=[[CEFR_test,  CEFR_feature_test,  CEFR_feature_testy],
       [CLEC_test,  CLEC_feature_test,  CLEC_feature_testy],
       [CLOTH_test, CLOTH_feature_test, CLOTH_feature_testy],
       [OSP_test,   OSP_feature_test,   OSP_feature_testy],
       [NES_test,   NES_feature_test,   NES_feature_testy],
       [RACE_test,  RACE_feature_test,  RACE_feature_testy]]

model = GloVeFeatXGBRegressor()
model.train(DATA_train, DATA_feature_train, DATA_trainy)

cor_metric = []
result = {}
for X_test,X_feat_test,y_test in tests:
    y_pred = model.predict(X_test, X_feat_test)
    result['Numy'] = y_test
    result['Numyp'] = y_pred
    df = pd.DataFrame(result)
    cor=print_metrics(y_test, y_pred)
    cor_metric.append(cor)    
cor = pd.DataFrame(columns=['NDCG','RJSD','RRNSS'],index=['CEFR','CLEC','CLOTH','OSP','NES','RACE'], data=cor_metric)

In [127]:
cor

Unnamed: 0,NDCG,RJSD,RRNSS
CEFR,0.966754,0.751987,0.951926
CLEC,0.95243,0.544423,0.969192
CLOTH,0.946969,0.434548,0.986852
OSP,0.835041,0.304359,0.875594
NES,0.986605,0.45579,0.977625
RACE,0.962889,0.709252,0.996171
