In [1]:
from pathlib import Path
import pandas as pd
import os
from plotnine import *

In [2]:
DIR = Path('results')
IMG_DIR = Path('imgs')
IMG_DIR.mkdir(exist_ok=True)

In [3]:
# 获取数据
tops = []
steps = []
ecs = []
accs = []
folds = []
for ec in range(5):
    fold_dir = DIR / str(ec)
    for fold in os.listdir(fold_dir):
        step_dir = fold_dir / str(fold)
        for step in os.listdir(step_dir):
            csv_dir = step_dir / str(step)
            for top in range(1, 6):
                ecs.append(str(ec))
                folds.append(str(fold))
                steps.append(int(step))
                tops.append(str(top))
                csv = csv_dir / f'top{top}.csv'
                df = pd.read_csv(csv)
                accs.append(df[(df['ec'] == 'overall') & (df['type'] == 'forward')]['value'].values[0])

df = pd.DataFrame({'ec': ecs, 'top': tops, 'acc': accs, 'step': steps, 'fold': folds})

In [4]:
df.head()

Unnamed: 0,ec,top,acc,step,fold
0,0,1,0.635708,10000,1
1,0,2,0.731289,10000,1
2,0,3,0.76826,10000,1
3,0,4,0.781785,10000,1
4,0,5,0.794409,10000,1


In [5]:
# 准确率-步数 不同 LEVEL
for fold, fgroup in df.groupby('fold'):
    for ec, ecgroup in fgroup.groupby('ec'):
        step_plot = (ggplot(ecgroup, aes(x='step', y='acc',
                                         group='top', color='top')) +
                     geom_line(size=1) +
                     scale_fill_hue(s=0.90, l=0.65, h=0.0417, color_space='husl') +
                     xlab("Steps") +
                     ylab("Accuracy") +
                     theme_matplotlib() +
                     guides(color = guide_legend(title='Top-K')) +
                     theme(axis_title=element_text(size=10, face="plain", color="black"),
                           axis_text=element_text(size=10, face="plain", color="black"),
                           legend_position=(0.8, 0.3),
                        
                           legend_background=element_blank(),
                           aspect_ratio=0.85,
                           figure_size=(12, 8),
                           dpi=120
                           ))
        step_plot.save(IMG_DIR / f'fold_{fold}_{ec}_step_plot.png')



In [6]:
"""top-1 随 steps 的增加而增加"""
top_step_df = df[df['top'] == '1']
top_step_df.head()

Unnamed: 0,ec,top,acc,step,fold
0,0,1,0.635708,10000,1
5,0,1,0.719567,20000,1
10,0,1,0.429216,5000,1
15,0,1,0.784491,45000,1
20,0,1,0.767358,30000,1


In [7]:
# 拿各自最好的 step 来比较
means = []
ecs = []
steps = []
for (step, ec), group in top_step_df.groupby(['step', 'ec']):
    means.append(group['acc'].mean())
    steps.append(step)
    ecs.append(ec)
mean_top_step_df = pd.DataFrame({'ec': ecs, 'acc': means, 'step': steps})
mean_top_step_df.head()

Unnamed: 0,ec,acc,step
0,0,0.426435,5000
1,1,0.48107,5000
2,2,0.484132,5000
3,3,0.507037,5000
4,4,0.529932,5000


In [8]:


top_step_plot = (ggplot(mean_top_step_df, aes(x='step', y='acc', group='ec', color='ec')) +
                geom_line(size=1) +
                scale_fill_hue(s=0.90, l=0.65, h=0.0417, color_space='husl') +
                xlab("Steps") +
                ylab("Accuracy") +
                theme_matplotlib() +
                guides(color=guide_legend(title='Reaction Condition Counts')) +
                theme(axis_title=element_text(size=10, face="plain", color="black"),
                    axis_text=element_text(
                    size=10, face="plain", color="black"),
    legend_position=(0.8, 0.3),
    legend_background=element_blank(),
    aspect_ratio=0.85,
    figure_size=(12, 8),
    dpi=120
))
top_step_plot.save(IMG_DIR / f'top_step_plot.png')




In [9]:
# 不同 step 下的准确率
for fold, fgroup in df.groupby('fold'):
    for step, stepgroup in fgroup.groupby('step'):
        ec_plot = (ggplot(stepgroup, aes(x='ec', y='acc',
                                         group='top', color='top')) +
                   geom_line(size=1) +
                   scale_fill_hue(s=0.90, l=0.65, h=0.0417, color_space='husl') +
                   xlab("Reaction Condition Counts") +
                   ylab("Accuracy") +
                   guides(color = guide_legend(title='Top-K')) +
                   theme(axis_title=element_text(size=10, face="plain", color="black"),
                         axis_text=element_text(size=10, face="plain", color="black"),
                         legend_position=(0.8, 0.3),
                         legend_background=element_blank(),
                         aspect_ratio=0.85,
                         figure_size=(8, 8),
                         dpi=100
                         ) +
                   theme_matplotlib()
                   )
        ec_plot.save(IMG_DIR / f'fold{fold}_{step}_ec_plot')



In [10]:
# 拿各自最好的 step 来比较
bests = []
means = []
for (fold, ec, top), group in df.groupby(['fold', 'ec', 'top']):
    bests.append(group.iloc[group['acc'].argmax()])
best_df = pd.concat(bests, axis=1).T

In [11]:
from pandas import CategoricalDtype


def rd(x):
    return round(x, 4)


for top, group in best_df.groupby('top'):
    group['class'] = group.ec.apply(str)
    group['class'] = group['class'].astype(CategoricalDtype(categories=["0", "1", "2", "3", "4"], ordered=True))
    inner_df = pd.DataFrame({'class': group['class'], 'value': group['acc'].apply(rd)})
    box_plot = (ggplot(inner_df, aes(x='class', y="value", fill="class"))
                + geom_boxplot(show_legend=True)
                + geom_jitter(fill="blue", shape=".", width=0.3, size=4, stroke=0.1, show_legend=True)
                + scale_fill_hue(s=0.90, l=0.65, h=0.0417, color_space='husl')
                + theme_matplotlib()
                + theme(
                aspect_ratio=1.05,
                dpi=100,
                figure_size=(8, 8)))
    box_plot.save(IMG_DIR / f'top_{top}_boxplot')



In [12]:
means = []
ecs = []
tops = []
for (ec, top), group in best_df.groupby(['ec', 'top']):
    means.append(group['acc'].mean())
    ecs.append(ec)
    tops.append(top)
means_df = pd.DataFrame({'class': ecs, 'top': tops, 'value': means})
best_plot = (ggplot(means_df, aes(x='class', y='value',
                                 group='top', color='top')) +
             geom_line(size=1) +
             scale_fill_hue(s=0.90, l=0.65, h=0.0417, color_space='husl') +
             xlab("Reaction Condition Counts") +
             ylab("Accuracy") +
             theme_matplotlib() +
             guides(color = guide_legend(title='Top-K')) +
             theme(axis_title=element_text(size=10, face="plain", color="black"),
                   axis_text=element_text(size=10, face="plain", color="black"),
                   legend_position=(0.8, 0.3),
                   legend_background=element_blank(),
                   aspect_ratio=0.85,
                   figure_size=(12, 8),
                   dpi=120
                   ))
best_plot.save(IMG_DIR / 'best.png')

