In [160]:
import pandas as pd
import numpy as np
import glob
import os

## data preprocess

In [2]:
input_path ='test_*.csv'
all_files = glob.glob(input_path)
print("files:", all_files)

files: ['test_result_EPOC.csv', 'test_result_ION.csv', 'test_result_JDC.csv', 'test_result_NCCO.csv']


In [3]:
df_test=pd.read_csv('test_result_EPOC.csv')
df_test

Unnamed: 0,PPI,Accuracy,Precision,Recall,F-measure,G-mean2,Specificity,NPV,TN,FP,FN,TP
0,1018,0.7613,0.4955,0.4564,0.4752,0.625,0.8559,0.8354,665,112,131,110
1,734,0.7411,0.4516,0.4884,0.4693,0.6322,0.8185,0.8394,460,102,88,84
2,541,0.7301,0.5503,0.5706,0.5602,0.6752,0.7989,0.8118,302,76,70,93


In [4]:
test_result = []

for i in range(3):
    for file in all_files:
        df = pd.read_csv(file)
        row = np.asarray(df.iloc[i, :])
        test_result.append(row)

In [5]:
test_results = pd.DataFrame(np.asarray(test_result),columns=df.columns)
test_results
if os.path.isfile('result.csv'):
    pass
else:
    test_results.to_csv('result.csv',sep=',',index=False)

In [6]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix


def compute_metrics(all_trues, all_scores, threshold):
    all_preds = (all_scores >= threshold)

    acc = metrics.accuracy_score(all_trues, all_preds)
    pre = metrics.precision_score(all_trues, all_preds)
    rec = metrics.recall_score(all_trues, all_preds)
    f1 = metrics.f1_score(all_trues, all_preds)
    #mcc = metrics.matthews_corrcoef(all_trues, all_preds)
    #fpr, tpr, _ = metrics.roc_curve(all_trues, all_scores)

    #AUC = metrics.auc(fpr, tpr)
    #p, r, _ = metrics.precision_recall_curve(all_trues, all_scores)
    #AUPR = metrics.auc(r, p)
    #AUPR = metrics.average_precision_score(all_trues, all_scores)
    tn, fp, fn, tp = metrics.confusion_matrix(all_trues, all_preds, labels=[0, 1]).ravel()
    specificity = tn/(tn+fp)
    gmean = np.sqrt(rec *specificity)
    NPV = tn / (tn + fn)

    return acc, pre, rec, f1, gmean, specificity, NPV, tn, fp, fn, tp

def print_metrics(data_type, metrics):
    """ Print the evaluation results """
    acc, pre, rec, f1, gmean, specificity, NPV, tn, fp, fn, tp = metrics
    res = '\t'.join([
        '%s:' % data_type,
        'acc:%0.4f' % acc,
        'pre:%0.4f' % pre,
        'rec:%0.4f' % rec,
        'f1:%0.4f' % f1,
        'gmean:%0.4f' % gmean,
        'specificity:%0.4f' % specificity,
        'NPV:%0.4f' % NPV,
        'TN=%-5d' % tn,
        'FP=%-5d' % fp,
        'FN=%-5d' % fn,
        'TP=%-5d' % tp,
    ])
    print(res)

In [7]:
# Calculate 5093, then 3672, then 2708
# 5093
test_trues = np.load('../../output/PPI5093/test_trues.npy')
final_test_scores = np.load('../../output/PPI5093/final_test_scores.npy')
final_test_threshold=np.load('../../output/PPI5093/final_test_threshold.npy')

number=len(test_trues)
output_file='result.csv'

# Select the best threshold by f1
final_test_metrics_f1 = compute_metrics(test_trues, final_test_scores, final_test_threshold)[:]
print_metrics('Final test on PPI5093', final_test_metrics_f1)

# write the results of EPViT on PPI5093 into 'result.csv'
with open(output_file, 'a', newline='') as f:
    f.write('%d, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %d, %d, %d, %d\n' % (
            number, final_test_metrics_f1[0],final_test_metrics_f1[1],final_test_metrics_f1[2],final_test_metrics_f1[3],final_test_metrics_f1[4],
    final_test_metrics_f1[5],final_test_metrics_f1[6],final_test_metrics_f1[7],final_test_metrics_f1[8],final_test_metrics_f1[9],final_test_metrics_f1[10],
))

Final test on PPI5093:	acc:0.8360	pre:0.6529	rec:0.6556	f1:0.6542	gmean:0.7647	specificity:0.8919	NPV:0.8930	TN=693  	FP=84   	FN=83   	TP=158  


In [8]:
# 3672
test_trues = np.load('../../output/PPI3672/test_trues.npy')
final_test_scores = np.load('../../output/PPI3672/final_test_scores.npy')
final_test_threshold=np.load('../../output/PPI3672/final_test_threshold.npy')


number=len(test_trues)
output_file='result.csv'

# Select the best threshold by f1
final_test_metrics_f1 = compute_metrics(test_trues, final_test_scores, final_test_threshold)[:]
print_metrics('Final test on PPI3672', final_test_metrics_f1)

# write the results of EPViT on PPI3672 into 'result.csv'
with open(output_file, 'a', newline='') as f:
    f.write('%d, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %d, %d, %d, %d\n' % (
            number, final_test_metrics_f1[0],final_test_metrics_f1[1],final_test_metrics_f1[2],final_test_metrics_f1[3],final_test_metrics_f1[4],
    final_test_metrics_f1[5],final_test_metrics_f1[6],final_test_metrics_f1[7],final_test_metrics_f1[8],final_test_metrics_f1[9],final_test_metrics_f1[10],
))

Final test on PPI3672:	acc:0.8065	pre:0.5758	rec:0.6628	f1:0.6162	gmean:0.7508	specificity:0.8505	NPV:0.8918	TN=478  	FP=84   	FN=58   	TP=114  


In [9]:
# 2708
test_trues = np.load('../../output/PPI2708/test_trues.npy')
final_test_scores = np.load('../../output/PPI2708/final_test_scores.npy')
final_test_threshold=np.load('../../output/PPI2708/final_test_threshold.npy')


number=len(test_trues)
output_file='result.csv'

# Select the best threshold by f1
final_test_metrics_f1 = compute_metrics(test_trues, final_test_scores, final_test_threshold)[:]
print_metrics('Final test on PPI2708', final_test_metrics_f1)

# write the results of EPViT on PPI2708 into 'result.csv'
with open(output_file, 'a', newline='') as f:
    f.write('%d, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %d, %d, %d, %d\n' % (
            number, final_test_metrics_f1[0],final_test_metrics_f1[1],final_test_metrics_f1[2],final_test_metrics_f1[3],final_test_metrics_f1[4],
    final_test_metrics_f1[5],final_test_metrics_f1[6],final_test_metrics_f1[7],final_test_metrics_f1[8],final_test_metrics_f1[9],final_test_metrics_f1[10],
))

Final test on PPI2708:	acc:0.8059	pre:0.7042	rec:0.6135	f1:0.6557	gmean:0.7385	specificity:0.8889	NPV:0.8421	TN=336  	FP=42   	FN=63   	TP=100  


# start here

In [2]:
import pandas as pd
import numpy as np

In [3]:
df=pd.read_csv('result.csv')
df

Unnamed: 0,PPI,Accuracy,Precision,Recall,F-measure,G-mean2,Specificity,NPV,TN,FP,FN,TP
0,1018.0,0.7613,0.4955,0.4564,0.4752,0.625,0.8559,0.8354,665.0,112.0,131.0,110.0
1,1018.0,0.7692,0.5144,0.444,0.4766,0.6215,0.87,0.8346,676.0,101.0,134.0,107.0
2,1018.0,0.7505,0.4721,0.4564,0.4641,0.6198,0.8417,0.8331,654.0,123.0,131.0,110.0
3,1018.0,0.7613,0.4955,0.4564,0.4752,0.625,0.8559,0.8354,665.0,112.0,131.0,110.0
4,734.0,0.7411,0.4516,0.4884,0.4693,0.6322,0.8185,0.8394,460.0,102.0,88.0,84.0
5,734.0,0.7643,0.4969,0.4709,0.4836,0.6342,0.8541,0.8406,480.0,82.0,91.0,81.0
6,734.0,0.7057,0.375,0.3837,0.3793,0.5555,0.8043,0.81,452.0,110.0,106.0,66.0
7,734.0,0.6158,0.2135,0.2384,0.2253,0.4175,0.7313,0.7583,411.0,151.0,131.0,41.0
8,541.0,0.7301,0.5503,0.5706,0.5602,0.6752,0.7989,0.8118,302.0,76.0,70.0,93.0
9,541.0,0.7264,0.5478,0.5276,0.5375,0.6546,0.8122,0.7995,307.0,71.0,77.0,86.0


In [4]:
df_5093 = df.loc[df['PPI']==1018]
df_3672 = df.loc[df['PPI']==734]
df_2708 = df.loc[df['PPI']==541]

In [5]:
df_2708

Unnamed: 0,PPI,Accuracy,Precision,Recall,F-measure,G-mean2,Specificity,NPV,TN,FP,FN,TP
8,541.0,0.7301,0.5503,0.5706,0.5602,0.6752,0.7989,0.8118,302.0,76.0,70.0,93.0
9,541.0,0.7264,0.5478,0.5276,0.5375,0.6546,0.8122,0.7995,307.0,71.0,77.0,86.0
10,541.0,0.6728,0.4562,0.4479,0.452,0.5872,0.7698,0.7638,291.0,87.0,90.0,73.0
11,541.0,0.5508,0.2561,0.2577,0.2569,0.4177,0.6772,0.679,256.0,122.0,121.0,42.0
14,541.0,0.8059,0.7042,0.6135,0.6557,0.7385,0.8889,0.8421,336.0,42.0,63.0,100.0


In [6]:
df_5093=df_5093.iloc[:,1:5]
df_3672=df_3672.iloc[:,1:5]
df_2708=df_2708.iloc[:,1:5]
df_5093

Unnamed: 0,Accuracy,Precision,Recall,F-measure
0,0.7613,0.4955,0.4564,0.4752
1,0.7692,0.5144,0.444,0.4766
2,0.7505,0.4721,0.4564,0.4641
3,0.7613,0.4955,0.4564,0.4752
12,0.836,0.6529,0.6556,0.6542


In [7]:
import decimal
from decimal import Decimal

df1 = df_5093.to_dict(orient='list', )
for k, v in df1.items():  # Iterative dictionary
    for j, l in enumerate(v):  # Iterate over the list of values
        a = Decimal(str(l)).quantize((Decimal('0.' + '0'*3)), rounding=decimal.ROUND_HALF_UP)  # Processing accuracy (focus)
        df1[k][j] = float(a)  # Conversion assignment str(a)
df_5093 = pd.DataFrame(df1)
print(df_5093)

df2 = df_3672.to_dict(orient='list', )
for k, v in df2.items():  # Iterative dictionary
    for j, l in enumerate(v):  # Iterate over the list of values
        a = Decimal(str(l)).quantize((Decimal('0.' + '0'*3)), rounding=decimal.ROUND_HALF_UP)  # Processing accuracy (focus)
        df2[k][j] = float(a)  # Conversion assignment str(a)
df_3672 = pd.DataFrame(df2)

df3 = df_2708.to_dict(orient='list', )
for k, v in df3.items():  # Iterative dictionary
    for j, l in enumerate(v):  # Iterate over the list of values
        a = Decimal(str(l)).quantize((Decimal('0.' + '0'*3)), rounding=decimal.ROUND_HALF_UP)  # Processing accuracy (focus)
        df3[k][j] = float(a)  # Conversion assignment str(a)
df_2708 = pd.DataFrame(df3)

    Accuracy   Precision   Recall   F-measure
0      0.761       0.496    0.456       0.475
1      0.769       0.514    0.444       0.477
2      0.751       0.472    0.456       0.464
3      0.761       0.496    0.456       0.475
4      0.836       0.653    0.656       0.654


In [8]:
index_list=['EPOC', 'ION', 'JDC', 'NCCO',  'EPViT']
df_5093.index=index_list
df_3672.index=index_list
df_2708.index=index_list

In [9]:
df_3672

Unnamed: 0,Accuracy,Precision,Recall,F-measure
EPOC,0.741,0.452,0.488,0.469
ION,0.764,0.497,0.471,0.484
JDC,0.706,0.375,0.384,0.379
NCCO,0.616,0.214,0.238,0.225
EPViT,0.807,0.576,0.663,0.616


In [10]:
df_2708

Unnamed: 0,Accuracy,Precision,Recall,F-measure
EPOC,0.73,0.55,0.571,0.56
ION,0.726,0.548,0.528,0.538
JDC,0.673,0.456,0.448,0.452
NCCO,0.551,0.256,0.258,0.257
EPViT,0.806,0.704,0.614,0.656


## Save the data results of the bar graph

In [19]:
with pd.ExcelWriter(r"output_from_plotbar.xlsx") as xlsx:
    df_5093.to_excel(xlsx, sheet_name="PPI5093")
    df_3672.to_excel(xlsx, sheet_name="PPI3672")
    df_2708.to_excel(xlsx, sheet_name="PPI2708")

# Plot bar and Click the 'Save Image' button
## There are two types of cases: 
### 1.adjusting the starting scale value 
### 2.not adjusting the starting scale value.

In [11]:
from pyecharts import options as opts
from pyecharts.charts import Bar

## 1.adjusting the starting scale value 

## PPI5093

In [175]:
bar1_a= (
    Bar()
    .add_xaxis(df_5093.columns.tolist())
    .add_yaxis('EPViT', df_5093.iloc[-1, :].apply(lambda x: f'{x:.3f}').to_list(), color='#C52A20')
    .add_yaxis('EPOC', df_5093.iloc[0, :].apply(lambda x: f'{x:.3f}').to_list(), color='#84A3A9')
    .add_yaxis('ION', df_5093.iloc[1, :].apply(lambda x: f'{x:.3f}').to_list(), color='#597C8B')
    .add_yaxis('JDC', df_5093.iloc[2, :].apply(lambda x: f'{x:.3f}').to_list(), color='#d5BA82')
    .add_yaxis('NCCO', df_5093.iloc[3, :].apply(lambda x: f'{x:.3f}').to_list(), color='#D6BBC1')
    
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(
            name='metrics',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set x-axis name font size
            axislabel_opts=opts.LabelOpts(font_size=20) # Set x-axis label font size
        ),
        yaxis_opts=opts.AxisOpts(
            name='values',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set y-axis name font size
            min_=0.4,                 # Set y-axis minimum value to 0.4
            interval=0.2,             # Set y-axis tick interval to 0.2
            axislabel_opts=opts.LabelOpts(font_size=20)  # Set y-axis label font size
        ),
        legend_opts=opts.LegendOpts(
            pos_left='right', 
            orient='horizontal',
            textstyle_opts=opts.TextStyleOpts(font_size=20)  # Set legend font size
        ),
        title_opts=opts.TitleOpts(pos_left='center', pos_top=20),
        toolbox_opts=opts.ToolboxOpts(
            is_show=True,
            pos_top='bottom',
            pos_left='right',
            feature={'saveAsImage': {'pixelRatio': 5}}
        )
    )

    
    .set_series_opts(
        label_opts=opts.LabelOpts(position='insidetop', 
                                  font_size=15, 
                                  rotate=55,
                                  color='black')
    )
)

In [176]:
bar1_a.render_notebook()

### PPI3672

In [185]:
bar2_a = (
    Bar()
    .add_xaxis(df_3672.columns.tolist())
    .add_yaxis('EPViT',df_3672.iloc[-1,:].apply(lambda x: f'{x:.3f}').to_list(),color='#C52A20')
    .add_yaxis('EPOC',df_3672.iloc[0,:].apply(lambda x: f'{x:.3f}').to_list(),color='#84A3A9')
    .add_yaxis('ION',df_3672.iloc[1,:].apply(lambda x: f'{x:.3f}').to_list(),color='#597C8B')
    .add_yaxis('JDC',df_3672.iloc[2,:].apply(lambda x: f'{x:.3f}').to_list(),color='#d5BA82')
    .add_yaxis('NCCO',df_3672.iloc[3,:].apply(lambda x: f'{x:.3f}').to_list(),color='#D6BBC1')
    
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(
            name='metrics',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set x-axis name font size
            axislabel_opts=opts.LabelOpts(font_size=20) # Set x-axis label font size
        ),
        yaxis_opts=opts.AxisOpts(
            name='values',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set y-axis name font size
            min_=0.2,                 # Set y-axis minimum value to 0.4
            interval=0.2,             # Set y-axis tick interval to 0.2
            axislabel_opts=opts.LabelOpts(font_size=20)  # Set y-axis label font size
        ),
        legend_opts=opts.LegendOpts(
            pos_left='right', 
            orient='horizontal',
            textstyle_opts=opts.TextStyleOpts(font_size=20)  # Set legend font size
        ),
        title_opts=opts.TitleOpts(pos_left='center', pos_top=20),
        toolbox_opts=opts.ToolboxOpts(
            is_show=True,
            pos_top='bottom',
            pos_left='right',
            feature={'saveAsImage': {'pixelRatio': 5}}
        )
    )

    
    .set_series_opts(
        label_opts=opts.LabelOpts(position='insidetop', 
                                  font_size=15, 
                                  rotate=55,
                                  color='black')
    )
)

In [186]:
bar2_a.render_notebook()

## PPI2708

In [187]:
bar3_a = (
    Bar()
    .add_xaxis(df_2708.columns.tolist())
    .add_yaxis('EPViT',df_2708.iloc[-1,:].apply(lambda x: f'{x:.3f}').to_list(),color='#C52A20')
    .add_yaxis('EPOC',df_2708.iloc[0,:].apply(lambda x: f'{x:.3f}').to_list(),color='#84A3A9')
    .add_yaxis('ION',df_2708.iloc[1,:].apply(lambda x: f'{x:.3f}').to_list(),color='#597C8B')
    .add_yaxis('JDC',df_2708.iloc[2,:].apply(lambda x: f'{x:.3f}').to_list(),color='#d5BA82')
    .add_yaxis('NCCO',df_2708.iloc[3,:].apply(lambda x: f'{x:.3f}').to_list(),color='#D6BBC1')
    
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(
            name='metrics',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set x-axis name font size
            axislabel_opts=opts.LabelOpts(font_size=20) # Set x-axis label font size
        ),
        yaxis_opts=opts.AxisOpts(
            name='values',
            name_textstyle_opts=opts.TextStyleOpts(font_size=20),  # Set y-axis name font size
            min_=0.2,                 # Set y-axis minimum value to 0.4
            interval=0.2,             # Set y-axis tick interval to 0.2
            axislabel_opts=opts.LabelOpts(font_size=20)  # Set y-axis label font size
        ),
        legend_opts=opts.LegendOpts(
            pos_left='right', 
            orient='horizontal',
            textstyle_opts=opts.TextStyleOpts(font_size=20)  # Set legend font size
        ),
        title_opts=opts.TitleOpts(pos_left='center', pos_top=20),
        toolbox_opts=opts.ToolboxOpts(
            is_show=True,
            pos_top='bottom',
            pos_left='right',
            feature={'saveAsImage': {'pixelRatio': 5}}
        )
    )

    
    .set_series_opts(
        label_opts=opts.LabelOpts(position='insidetop', 
                                  font_size=15, 
                                  rotate=55,
                                  color='black')
    )
)

In [188]:
bar3_a.render_notebook()