In [46]:
import jieba,os,re
from gensim import corpora, models, similarities
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Bidirectional

from sklearn.model_selection import cross_validate, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import KFold

from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
max_abs_scaler = preprocessing.MaxAbsScaler()


def getPositionEncoding(seq_len, d, n):
    P = np.zeros((seq_len, d))
    for k in range(seq_len):
        for i in np.arange(int(d/2)):
            denominator = np.power(n, 2*i/d)
            P[k, 2*i] = np.sin(k/denominator)
            P[k, 2*i+1] = np.cos(k/denominator)
    return P

def getPositionEmbedding(column_name, short_name, d, n=10000):
    seq_len = merge_pd[column_name].max()
    result = getPositionEncoding(seq_len, d, n)
    df_PE = pd.DataFrame(result)
    df_PE.index = np.arange(1, len(df_PE) + 1)

    df_PE.index.name = column_name
    column_names = []

    for i in range(d):
        column_names.append(short_name + '_' + str(i + 1))
    df_PE.columns = column_names
    return df_PE


# LR

In [47]:
# LR rank
LR_results = []

for d in range(5, 1000, 10):

    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []

    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()

    col_all = np.concatenate([col1, col2, col3])

    ## LR rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])
    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])

    # Transfer the SVM model into Logistic Regression model
    model = LogisticRegression(max_iter= 5000)

    # Evaluate the model using 5-fold cross-validation
    scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    cv_results = cross_validate(model, X, Y, cv=5, scoring=scoring)

    # # Print the average performance across all folds
    # print("Accuracy:", np.mean(cv_results['test_accuracy']))
    # print("Precision:", np.mean(cv_results['test_precision_macro']))
    # print("Recall:", np.mean(cv_results['test_recall_macro']))
    # print("F1:", np.mean(cv_results['test_f1_macro']))
    # 把平均性能指标加入到结果列表中
    LR_results.append(np.mean(cv_results['test_f1_macro']))


# 打印结果列表
print(LR_results)

[0.5845843513086052, 0.695209389386094, 0.7152879559513157, 0.6569023264183467, 0.6362112361053368, 0.6055660427560687, 0.5961470165918945, 0.5896634742151641, 0.5803962306883692, 0.5654934322737032, 0.5674622159426581, 0.5616061765208399, 0.5584708921046679, 0.5524287837069844, 0.5405872249260065, 0.5715553147051311, 0.5723372981814998, 0.5515499731184288, 0.5558513897353349, 0.5461183492271806, 0.5509487752616085, 0.5524335509085494, 0.5585781237236737, 0.568363438790981, 0.5688566148254629, 0.5691089877255096, 0.5447394439688609, 0.5692834434288737, 0.563934131049615, 0.5603509368728534, 0.552643054783367, 0.5619972165676138, 0.5473507924271395, 0.5509925426425125, 0.5532765024650915, 0.5553592803428231, 0.5559221220155661, 0.5605392143187629, 0.5660823398038267, 0.5533430230789605, 0.5570213575802957, 0.5480469996172356, 0.5509187910923851, 0.5584208813933884, 0.5617464020835763, 0.5508580824753413, 0.5536158081580786, 0.5633660465988115, 0.5531913015114337, 0.5532791676499571, 0.5

# SVM

In [48]:
# SVM rank

SVM_results = []

for d in range(5, 1000, 10):

    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []

    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()

    col_all = np.concatenate([col1, col2, col3])

    ## LR rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])
    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])
    
    # Transfer to the SVM model 
    model = svm.SVC(kernel='rbf',decision_function_shape='ovo')

    # Evaluate the model using 5-fold cross-validation
    scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    cv_results = cross_validate(model, X, Y, cv=5, scoring=scoring)

    # # Print the average performance across all folds
    # print("Accuracy:", np.mean(cv_results['test_accuracy']))
    # print("Precision:", np.mean(cv_results['test_precision_macro']))
    # print("Recall:", np.mean(cv_results['test_recall_macro']))
    # print("F1:", np.mean(cv_results['test_f1_macro']))
    # 把平均性能指标加入到结果列表中
    SVM_results.append(np.mean(cv_results['test_f1_macro']))

# 打印结果列表
print(SVM_results)

[0.5933300641047525, 0.7019680465828436, 0.7251471686449449, 0.7067672714953139, 0.7030727595895134, 0.6948299514683669, 0.7230565466804344, 0.716604077590745, 0.7088021017114651, 0.7177729100270888, 0.7127116097514121, 0.7078159629845079, 0.7124745802289303, 0.715111966403532, 0.7055691369557506, 0.7046822778785089, 0.7174363557857966, 0.7059038228704179, 0.7180349976613633, 0.6964300392569366, 0.716477835256506, 0.7139265002483515, 0.7057787143903245, 0.71134532678375, 0.709761703123019, 0.7066138602249886, 0.7027243437457231, 0.7061478703692963, 0.7081572169310408, 0.7060227618892029, 0.7046822778785089, 0.7062593335073741, 0.7091829697094735, 0.7111287098479846, 0.7057274728893312, 0.7126473855670178, 0.7099778485894588, 0.7059312534335309, 0.7096309686805171, 0.7081846474941539, 0.7095248198923725, 0.7059312534335309, 0.7081846474941539, 0.7059312534335309, 0.713165958442168, 0.7093355087526798, 0.7084012644299191, 0.7063640158357358, 0.7075148770942619, 0.7021648816366017, 0.7006

# MLP

In [49]:
# MLP rank

MLP_results = []

for d in range(5, 1000, 10):
    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []

    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()

    col_all = np.concatenate([col1, col2, col3])

    ## LR rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])
    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])

    # Transfer into MLP model
    model = MLPClassifier(max_iter=2000) # max_iter is set to 200 as default


    # Evaluate the model using 5-fold cross-validation
    scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    cv_results = cross_validate(model, X, Y, cv=5, scoring=scoring)

    # # Print the average performance across all folds
    # print("Accuracy:", np.mean(cv_results['test_accuracy']))
    # print("Precision:", np.mean(cv_results['test_precision_macro']))
    # print("Recall:", np.mean(cv_results['test_recall_macro']))
    # print("F1:", np.mean(cv_results['test_f1_macro']))
    # 把平均性能指标加入到结果列表中
    MLP_results.append(np.mean(cv_results['test_f1_macro']))

# 打印结果列表
print(MLP_results)

[0.6075236885506059, 0.6414448527898948, 0.6643118820809774, 0.6274646696912627, 0.6337389611800855, 0.6386949581481011, 0.6321602328101498, 0.6337339939689379, 0.6484179038952511, 0.6119146427960167, 0.6241823133258131, 0.6248541787198452, 0.6083059117969203, 0.6174669885701108, 0.61956186536519, 0.6289403318831052, 0.6195070278892548, 0.598941600014215, 0.6065284945535465, 0.6070891210264977, 0.6240019932663475, 0.62577752124476, 0.599471003560396, 0.6125973418552706, 0.6084232211891021, 0.5873772401463283, 0.6004181107455218, 0.6071190120751255, 0.601585976347853, 0.6064504278381191, 0.6261293179466672, 0.6053629113220627, 0.6006192406778541, 0.6004216408484782, 0.6015819940452009, 0.6091434900375503, 0.6169445430806942, 0.6073909425194939, 0.6105653451232959, 0.592007854034609, 0.6230310185464079, 0.600701331564969, 0.603347521368421, 0.6114701561978324, 0.6110229911968709, 0.622946531884758, 0.6121465981371709, 0.6048997760823088, 0.6031121108288213, 0.6017708123853791, 0.60590521

In [50]:
SVM_results

[0.5933300641047525,
 0.7019680465828436,
 0.7251471686449449,
 0.7067672714953139,
 0.7030727595895134,
 0.6948299514683669,
 0.7230565466804344,
 0.716604077590745,
 0.7088021017114651,
 0.7177729100270888,
 0.7127116097514121,
 0.7078159629845079,
 0.7124745802289303,
 0.715111966403532,
 0.7055691369557506,
 0.7046822778785089,
 0.7174363557857966,
 0.7059038228704179,
 0.7180349976613633,
 0.6964300392569366,
 0.716477835256506,
 0.7139265002483515,
 0.7057787143903245,
 0.71134532678375,
 0.709761703123019,
 0.7066138602249886,
 0.7027243437457231,
 0.7061478703692963,
 0.7081572169310408,
 0.7060227618892029,
 0.7046822778785089,
 0.7062593335073741,
 0.7091829697094735,
 0.7111287098479846,
 0.7057274728893312,
 0.7126473855670178,
 0.7099778485894588,
 0.7059312534335309,
 0.7096309686805171,
 0.7081846474941539,
 0.7095248198923725,
 0.7059312534335309,
 0.7081846474941539,
 0.7059312534335309,
 0.713165958442168,
 0.7093355087526798,
 0.7084012644299191,
 0.7063640158357358,

# RNN

In [51]:
# RNN rank

RNN_results = []

for d in range(5, 1000, 10):

    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []
    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()
    col_all = np.concatenate([col1, col2, col3])

    # RNN rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])

    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])


    # Reshape: Shape before reshaping: (486, 150)
    print("Shape before reshaping:", X.shape)
    X = X.reshape(X.shape[0], int(X.shape[1]/df_2.shape[1]), df_2.shape[1]) # Shape after reshaping: (486, 3, 50)
    print("Shape after reshaping:", X.shape)

    # Define the number of folds
    n_folds = 5

    # Define the KFold object
    kfold = KFold(n_splits=n_folds, shuffle=True)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state = 42)

    # Define the RNN model
    model = Sequential()
    model.add(SimpleRNN(10, input_shape=(X_train.shape[1], X_train.shape[2])))  # input_shape=(n_timesteps, features)
    model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mae'])

    # Define early stopping criteria
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Initialize lists to store the performance metrics
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []

    # Loop over the folds
    for train_index, test_index in kfold.split(X):
        # Split data into train and test sets
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        # Train the model
        model.fit(X_train, y_train, epochs=1024, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

        # Predict the labels for the test set
        y_pred = model.predict(X_test)
        y_pred = (y_pred > 0.5).astype(int)

        # Calculate and print the classification report
        report = classification_report(y_test, y_pred, output_dict=True)

        # Append the performance metrics to the lists
        accuracy_list.append(report['accuracy'])
        precision_list.append(report['macro avg']['precision'])
        recall_list.append(report['macro avg']['recall'])
        f1_list.append(report['macro avg']['f1-score'])
    
    RNN_results.append(np.mean(f1_list))

# 打印结果列表
print(RNN_results)

Shape before reshaping: (486, 15)
Shape after reshaping: (486, 3, 5)
Epoch 1/1024
Epoch 2/1024
Epoch 3/1024
Epoch 4/1024
Epoch 5/1024
Epoch 6/1024
Epoch 7/1024
Epoch 8/1024
Epoch 9/1024
Epoch 10/1024
Epoch 11/1024
Epoch 12/1024
Epoch 13/1024
Epoch 14/1024
Epoch 15/1024
Epoch 16/1024
Epoch 17/1024
Epoch 18/1024
Epoch 19/1024
Epoch 20/1024
Epoch 21/1024
Epoch 22/1024
Epoch 23/1024
Epoch 24/1024
Epoch 25/1024
Epoch 26/1024
Epoch 27/1024
Epoch 28/1024
Epoch 29/1024
Epoch 30/1024
Epoch 31/1024
Epoch 32/1024
Epoch 33/1024
Epoch 34/1024
Epoch 35/1024
Epoch 36/1024
Epoch 37/1024
Epoch 38/1024
Epoch 39/1024
Epoch 40/1024
Epoch 41/1024
Epoch 42/1024
Epoch 43/1024
Epoch 1/1024
Epoch 2/1024
Epoch 3/1024
Epoch 4/1024
Epoch 5/1024
Epoch 6/1024
Epoch 7/1024
Epoch 8/1024
Epoch 9/1024
Epoch 10/1024
Epoch 11/1024
Epoch 1/1024
Epoch 2/1024
Epoch 3/1024
Epoch 4/1024
Epoch 5/1024
Epoch 6/1024
Epoch 7/1024
Epoch 8/1024
Epoch 9/1024
Epoch 10/1024
Epoch 11/1024
Epoch 12/1024
Epoch 1/1024
Epoch 2/1024
Epoch 3/

# LSTM

In [52]:
# LSTM rank

LSTM_results = []

for d in range(5, 1000, 10):
    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []
    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()
    col_all = np.concatenate([col1, col2, col3])

    # LSTM rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])

    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])


    # Reshape: Shape before reshaping: (486, 150)
    print("Shape before reshaping:", X.shape)
    X = X.reshape(X.shape[0], int(X.shape[1]/df_2.shape[1]), df_2.shape[1]) # Shape after reshaping: (486, 3, 50)
    print("Shape after reshaping:", X.shape)

    # Define the number of folds
    n_folds = 5

    # Define the KFold object
    kfold = KFold(n_splits=n_folds, shuffle=True)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state = 42)

    # Define the RNN model
    model = Sequential()
    #model.add(LSTM(256, dropout = 0.3, recurrent_dropout = 0.3, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(10, input_shape=(X_train.shape[1], X_train.shape[2])))  # input_shape=(n_timesteps, features)
    model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mae'])

    # Define early stopping criteria
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Initialize lists to store the performance metrics
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []

    # Loop over the folds
    for train_index, test_index in kfold.split(X):
        # Split data into train and test sets
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        # Train the model
        model.fit(X_train, y_train, epochs=4096, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

        # Predict the labels for the test set
        y_pred = model.predict(X_test)
        y_pred = (y_pred > 0.5).astype(int)

        # Calculate and print the classification report
        report = classification_report(y_test, y_pred, output_dict=True)

        # Append the performance metrics to the lists
        accuracy_list.append(report['accuracy'])
        precision_list.append(report['macro avg']['precision'])
        recall_list.append(report['macro avg']['recall'])
        f1_list.append(report['macro avg']['f1-score'])
    
    LSTM_results.append(np.mean(f1_list))

# 打印结果列表
print(LSTM_results)

Shape before reshaping: (486, 15)
Shape after reshaping: (486, 3, 5)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 23/4096
Epoch 24/4096
Epoch 25/4096
Epoch 26/4096
Epoch 27/4096
Epoch 28/4096
Epoch 29/4096
Epoch 30/4096
Epoch 31/4096
Epoch 32/4096
Epoch 33/4096
Epoch 34/4096
Epoch 35/4096
Epoch 36/4096
Epoch 37/4096
Epoch 38/4096
Epoch 39/4096
Epoch 40/4096
Epoch 41/4096
Epoch 42/4096
Epoch 43/4096
Epoch 44/4096
Epoch 45/4096
Epoch 46/4096
Epoch 47/4096
Epoch 48/4096
Epoch 49/4096
Epoch 50/4096
Epoch 51/4096
Epoch 52/4096
Epoch 53/4096
Epoch 54/4096
Epoch 55/4096
Epoch 56/4096
Epoch 57/4096
Epoch 58/4096
Epoch 59/4096
Epoch 60/4096
Epoch 61/4096
Epoch 62/4096
Epoch 63/4096
Epoch 64/4096
Epoch 65/4096
Epoch 66/4096
Epoch 67/4096
Ep

Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Shape before reshaping: (486, 315)
Shape after reshaping: (486, 3, 105)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096

# GRU

In [53]:
# GRU rank

GRU_results = []

for d in range(5, 1000, 10):

    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []
    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()
    col_all = np.concatenate([col1, col2, col3])

    # GRU rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])

    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])


    # Reshape: Shape before reshaping: (486, 150)
    print("Shape before reshaping:", X.shape)
    X = X.reshape(X.shape[0], int(X.shape[1]/df_2.shape[1]), df_2.shape[1]) # Shape after reshaping: (486, 3, 50)
    print("Shape after reshaping:", X.shape)

    # Define the number of folds
    n_folds = 5

    # Define the KFold object
    kfold = KFold(n_splits=n_folds, shuffle=True)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state = 42)

    # Define the GRU model
    model = Sequential()
    model.add(GRU(10,  input_shape=(X_train.shape[1], X_train.shape[2])))  # input_shape=(n_timesteps, features)
    model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mae'])

    # Define early stopping criteria
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Initialize lists to store the performance metrics
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []

    # Loop over the folds
    for train_index, test_index in kfold.split(X):
        # Split data into train and test sets
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        # Train the model
        model.fit(X_train, y_train, epochs=4096, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

        # Predict the labels for the test set
        y_pred = model.predict(X_test)
        y_pred = (y_pred > 0.5).astype(int)

        # Calculate and print the classification report
        report = classification_report(y_test, y_pred, output_dict=True)

        # Append the performance metrics to the lists
        accuracy_list.append(report['accuracy'])
        precision_list.append(report['macro avg']['precision'])
        recall_list.append(report['macro avg']['recall'])
        f1_list.append(report['macro avg']['f1-score'])
    
    GRU_results.append(np.mean(f1_list))

# 打印结果列表
print(GRU_results)

Shape before reshaping: (486, 15)
Shape after reshaping: (486, 3, 5)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 23/4096
Epoch 24/4096
Epoch 25/4096
Epoch 26/4096
Epoch 27/4096
Epoch 28/4096
Epoch 29/4096
Epoch 30/4096
Epoch 31/4096
Epoch 32/4096
Epoch 33/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 23/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2

Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Shape before reshaping: (486, 135)
Shape after reshaping: (486, 3, 45)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 23/4096
Epoch 24/4096
Epoch 25/4096
Epo

# Bi-LSTM

In [54]:
# Bi_LSTM rank

Bi_LSTM_results = []

for d in range(5, 1000, 10):

    input_df=pd.read_csv('Student Engagement Level Prediction - Multiclass Case/Student Engagement Level-Multiclass.csv')
    input_p_df=pd.read_csv('Student Performance Prediction - Multiclass Case/Student Performance Prediction-Multi.csv')
    merge_pd = pd.merge(input_df, input_p_df, on='Student ID')

    ''' creat columns called 'Assignment0x [n]_rank' for different assignments '''

    merge_pd['Assignment01 [8]_rank'] = merge_pd['Assignment01 [8]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment02 [12]_rank'] = merge_pd['Assignment02 [12]'].rank(ascending=False, method='max').astype(int)
    merge_pd['Assignment03 [25]_rank'] = merge_pd['Assignment03 [25]'].rank(ascending=False, method='max').astype(int)

    # get the position embedding for each assignment
    df_1 = pd.DataFrame(getPositionEmbedding('Assignment01 [8]_rank', 'A1', d))
    df_2 = pd.DataFrame(getPositionEmbedding('Assignment02 [12]_rank', 'A2', d))
    df_3 = pd.DataFrame(getPositionEmbedding('Assignment03 [25]_rank', 'A3', d))

    # define label
    merge_pd['below 80'] = 0
    merge_pd.loc[merge_pd['Course Grade']<80, 'below 80'] = 1

    # merge the position embedding with the original data
    merge_pd = pd.merge(pd.merge(pd.merge(merge_pd, df_1, on='Assignment01 [8]_rank'), df_2, on='Assignment02 [12]_rank'), df_3, on='Assignment03 [25]_rank')

    col1 =[]
    col2 = []
    col3 = []
    col_all = []
    col1 = df_1.columns.tolist()
    col2 = df_2.columns.tolist()
    col3 = df_3.columns.tolist()
    col_all = np.concatenate([col1, col2, col3])

    # Bi_LSTM rank
    # Define feature set and target variable
    X = np.array(merge_pd[col_all])

    X = max_abs_scaler.fit_transform(X) 
    Y = np.array(merge_pd['below 80'])


    # Reshape: Shape before reshaping: (486, 150)
    print("Shape before reshaping:", X.shape)
    X = X.reshape(X.shape[0], int(X.shape[1]/df_2.shape[1]), df_2.shape[1]) # Shape after reshaping: (486, 3, 50)
    print("Shape after reshaping:", X.shape)

    # Define the number of folds
    n_folds = 5

    # Define the KFold object
    kfold = KFold(n_splits=n_folds, shuffle=True)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state = 42)

    # Define the Bi-LSTM model model
    model = Sequential()
    model.add(Bidirectional(LSTM(10, input_shape=(X_train.shape[1], X_train.shape[2]))))# input_shape=(n_timesteps, features)
    model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mae'])

    # Define early stopping criteria
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Initialize lists to store the performance metrics
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []

    # Loop over the folds
    for train_index, test_index in kfold.split(X):
        # Split data into train and test sets
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        # Train the model
        model.fit(X_train, y_train, epochs=4096, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

        # Predict the labels for the test set
        y_pred = model.predict(X_test)
        y_pred = (y_pred > 0.5).astype(int)

        # Calculate and print the classification report
        report = classification_report(y_test, y_pred, output_dict=True)

        # Append the performance metrics to the lists
        accuracy_list.append(report['accuracy'])
        precision_list.append(report['macro avg']['precision'])
        recall_list.append(report['macro avg']['recall'])
        f1_list.append(report['macro avg']['f1-score'])
    
    Bi_LSTM_results.append(np.mean(f1_list))

# 打印结果列表
print(Bi_LSTM_results)

Shape before reshaping: (486, 15)
Shape after reshaping: (486, 3, 5)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 23/4096
Epoch 24/4096
Epoch 25/4096
Epoch 26/4096
Epoch 27/4096
Epoch 28/4096
Epoch 29/4096
Epoch 30/4096
Epoch 31/4096
Epoch 32/4096
Epoch 33/4096
Epoch 34/4096
Epoch 35/4096
Epoch 36/4096
Epoch 37/4096
Epoch 38/4096
Epoch 39/4096
Epoch 40/4096
Epoch 41/4096
Epoch 42/4096
Epoch 43/4096
Epoch 44/4096
Epoch 45/4096
Epoch 46/4096
Epoch 47/4096
Epoch 48/4096
Epoch 49/4096
Epoch 50/4096
Epoch 51/4096
Epoch 52/4096
Epoch 53/4096
Epoch 54/4096
Epoch 55/4096
Epoch 56/4096
Epoch 57/4096
Epoch 58/4096
Epoch 59/4096
Epoch 60/4096
Epoch 61/4096
Epoch 62/4096
Epoch 63/4096
Epoch 64/4096
Epoch 65/4096
Epoch 66/4096
Epoch 67/4096
Ep

Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Shape before reshaping: (486, 225)
Shape after reshaping: (486, 3, 75)
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 12/4096
Epoch 13/4096
Epoch 14/4096
Epoch 15/4096
Epoch 16/4096
Epoch 17/4096
Epoch 18/4096
Epoch 19/4096
Epoch 20/4096
Epoch 21/4096
Epoch 22/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/4096
Epoch 9/4096
Epoch 10/4096
Epoch 11/4096
Epoch 1/4096
Epoch 2/4096
Epoch 3/4096
Epoch 4/4096
Epoch 5/4096
Epoch 6/4096
Epoch 7/4096
Epoch 8/

In [57]:
Bi_LSTM_results

[0.7518935915647311,
 0.8005701674692199,
 0.8003772451936972,
 0.80486598147651,
 0.798223698094547,
 0.8087684288898341,
 0.7911217202662858,
 0.8111780440799661,
 0.8193174675727436,
 0.7995575826106286,
 0.830338133047935,
 0.8115672254424439,
 0.821555396747496,
 0.7974570405378691,
 0.8044125350811393,
 0.8078759846483464,
 0.8001412484559383,
 0.8021917371695466,
 0.8098080601145494,
 0.8118003283296928,
 0.8341193950377006,
 0.7991651758095257,
 0.7955136268235639,
 0.8057599428393931,
 0.8057156314138325,
 0.813755511263993,
 0.8080145367883608,
 0.8137877335974124,
 0.8013877698698998,
 0.7806093964047809,
 0.8302600133033342,
 0.8310787574809637,
 0.7991552544100818,
 0.8779948755461051,
 0.8233853786534621,
 0.8206998559435634,
 0.8156409290315272,
 0.7997996881781493,
 0.8258711311616281,
 0.807043772070546,
 0.8099285587716636,
 0.812959269539774,
 0.8326698406206692,
 0.8267722185100407,
 0.8624776095038819,
 0.8408224069574866,
 0.8205011097791546,
 0.8327245332129053,


In [173]:
data = [LR_results, SVM_results, MLP_results, RNN_results, LSTM_results, GRU_results, Bi_LSTM_results]
df = pd.DataFrame(data=data, index=['LR', 'SVM', 'MLP', 'RNN', 'LSTM', 'GRU', 'Bi_LSTM'], columns=range(5, 1000, 10))
df

Unnamed: 0,5,15,25,35,45,55,65,75,85,95,...,905,915,925,935,945,955,965,975,985,995
LR,0.584584,0.695209,0.715288,0.656902,0.636211,0.605566,0.596147,0.589663,0.580396,0.565493,...,0.54907,0.54907,0.54907,0.54907,0.545902,0.545902,0.547991,0.543794,0.546185,0.546185
SVM,0.59333,0.701968,0.725147,0.706767,0.703073,0.69483,0.723057,0.716604,0.708802,0.717773,...,0.705931,0.705931,0.705931,0.705931,0.705931,0.705931,0.704111,0.704111,0.705931,0.702429
MLP,0.607524,0.641445,0.664312,0.627465,0.633739,0.638695,0.63216,0.633734,0.648418,0.611915,...,0.626646,0.613296,0.621402,0.62956,0.605507,0.622968,0.626607,0.615847,0.620542,0.609932
RNN,0.65897,0.814292,0.808763,0.811252,0.814647,0.803922,0.812993,0.813285,0.801599,0.830704,...,0.853565,0.826565,0.841732,0.80646,0.803185,0.823538,0.803419,0.828826,0.804352,0.815534
LSTM,0.682779,0.802638,0.803536,0.806103,0.795214,0.817012,0.815719,0.813704,0.805805,0.794484,...,0.843353,0.831342,0.797804,0.798221,0.809016,0.836881,0.809127,0.813648,0.840769,0.806693
GRU,0.672054,0.81222,0.802583,0.804178,0.791886,0.795015,0.818469,0.815782,0.845893,0.77671,...,0.856373,0.808003,0.797384,0.794377,0.802747,0.820957,0.814374,0.832099,0.802977,0.791641
Bi_LSTM,0.751894,0.80057,0.800377,0.804866,0.798224,0.808768,0.791122,0.811178,0.819317,0.799558,...,0.799753,0.82848,0.83266,0.813735,0.780943,0.826524,0.827355,0.825374,0.810252,0.832318


In [174]:
# save the dataframe as a csv file, specify the file name as "data.csv"
df.to_csv("data.csv")

# plot

In [90]:
# 导入plotly库
import plotly.graph_objects as go

# 创建一个空白的图形对象
fig = go.Figure()

d_list = []
for d in range(5, 400, 100):
    d_list.append(d)

columns = d_list

# 添加各个模型的曲线，指定x轴、y轴、颜色和标签
fig.add_trace(go.Scatter(x=d_list, y=LR_results, mode='lines', line=dict(color='red'), name='LR'))
fig.add_trace(go.Scatter(x=d_list, y=SVM_results, mode='lines', line=dict(color='blue'), name='SVM'))
fig.add_trace(go.Scatter(x=d_list, y=MLP_results, mode='lines', line=dict(color='green'), name='MLP'))
fig.add_trace(go.Scatter(x=d_list, y=RNN_results, mode='lines', line=dict(color='cyan'), name='RNN'))
fig.add_trace(go.Scatter(x=d_list, y=LSTM_results, mode='lines', line=dict(color='magenta'), name='LSTM'))
fig.add_trace(go.Scatter(x=d_list, y=GRU_results, mode='lines', line=dict(color='yellow'), name='GRU'))
fig.add_trace(go.Scatter(x=d_list, y=Bi_LSTM_results , mode='lines', line=dict(color='black'), name='Bi-LSTM'))


# 设置标题和坐标轴标签
fig.update_layout(title="Model Performance vs d value", xaxis_title="d value", yaxis_title="Model Performance")

# 调整图例位置和大小
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

# 显示图形
fig.show()