In [None]:
import pandas as pd
import altair as alt

df = pd.read_csv('output/evaluate_summaries_output.csv')
column_names = ['pysummarization', 'Gensim', 'spaCy', 'TextRank']

In [None]:
rouge1_df = df[['pysum_rouge1','gensim_rouge1', 'spacy_rouge1', 'cos_sim_rouge1']]
rouge1_df.columns = column_names

alt.Chart(rouge1_df, title = 'Scores for Rouge Unigram Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(rouge1_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(rouge1_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(rouge1_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(rouge1_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Rouge Unigram Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
rouge2_df = df[['pysum_rouge2','gensim_rouge2', 'spacy_rouge2', 'cos_sim_rouge2']]
rouge2_df.columns = column_names

alt.Chart(rouge2_df, title = 'Scores for Rouge Bigram Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(rouge2_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(rouge2_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(rouge2_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(rouge2_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Rouge Bigram Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
rougeL_df = df[['pysum_rougeL','gensim_rougeL', 'spacy_rougeL', 'cos_sim_rougeL']]
rougeL_df.columns = column_names

alt.Chart(rougeL_df, title = 'Scores for Rouge L Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(rougeL_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(rougeL_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(rougeL_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(rougeL_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Rouge L Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
rougeLsum_df = df[['pysum_rougeLsum','gensim_rougeLsum', 'spacy_rougeLsum', 'cos_sim_rougeLsum']]
rougeLsum_df.columns = column_names

alt.Chart(rougeLsum_df, title = 'Scores for Rouge Lsum Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(rougeLsum_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(rougeLsum_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(rougeLsum_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(rougeLsum_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Rouge Lsum Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
bleu_df = df[['pysum_bleu','gensim_bleu', 'spacy_bleu', 'cos_sim_bleu']]
bleu_df.columns = column_names

alt.Chart(bleu_df, title = 'Scores for Bleu Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(bleu_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(bleu_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(bleu_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(bleu_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Bleu Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
bert_df = df[['pysum_bert','gensim_bert', 'spacy_bert', 'cos_sim_bert']]
bert_df.columns = column_names

alt.Chart(bert_df, title = 'Scores for BertScore Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(bert_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(bert_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(bert_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(bert_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for BertScore Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
meteor_df = df[['pysum_meteor','gensim_meteor', 'spacy_meteor', 'cos_sim_meteor']]
meteor_df.columns = column_names

alt.Chart(meteor_df, title = 'Scores for Meteor Metric per Summarization Method'
).transform_fold(
  column_names,
  as_ = ['Method', 'Score']
).mark_boxplot(
  size = 50,
  outliers = {'size': 3}
).encode(
  x = alt.X('Method:N', axis = alt.Axis(labelAngle = 0), sort = column_names),
  y = alt.Y('Score:Q', scale = alt.Scale(domain = (0, 1)))
).properties(width = 300).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
chart1 = alt.Chart(meteor_df).mark_bar().encode(
  x = alt.X('pysummarization:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='pysummarization Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart2 = alt.Chart(meteor_df).mark_bar().encode(
  x = alt.X('Gensim:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='Gensim Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart3 = alt.Chart(meteor_df).mark_bar().encode(
  x = alt.X('spaCy:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='spaCy Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
chart4 = alt.Chart(meteor_df).mark_bar().encode(
  x = alt.X('TextRank:Q', bin = alt.Bin(extent = [0, 1], step = 0.05), title='TextRank Article Scores'),
  y = alt.X('count()', title='Number of Articles')
).properties(height = 100, width = 500)
alt.vconcat(chart1, chart2, chart3, chart4
  ).properties(title = alt.TitleParams(text = 'Scores for Meteor Metric per Summarization Method', fontSize = 16, align = 'center', dx = 275)
  ).configure_title(fontSize = 14).configure_axis(labelFontSize = 12, titleFontSize = 12)

In [None]:
describe_df = df[[
  'pysum_rouge1', 'pysum_rouge2', 'pysum_rougeL', 'pysum_rougeLsum', 'pysum_bleu', 'pysum_bert', 'pysum_meteor',
  'gensim_rouge1', 'gensim_rouge2', 'gensim_rougeL', 'gensim_rougeLsum', 'gensim_bleu', 'gensim_bert', 'gensim_meteor',
  'spacy_rouge1', 'spacy_rouge2', 'spacy_rougeL', 'spacy_rougeLsum', 'spacy_bleu', 'spacy_bert', 'spacy_meteor',
  'cos_sim_rouge1', 'cos_sim_rouge2', 'cos_sim_rougeL', 'cos_sim_rougeLsum', 'cos_sim_bleu', 'cos_sim_bert', 'cos_sim_meteor'
]]
describe_df.columns = [
  'pysummarization rouge1', 'pysummarization rouge2', 'pysummarization rougeL', 'pysummarization rougeLsum', 'pysummarization bleu', 'pysummarization bert', 'pysummarization meteor',
  'Gensim rouge1', 'Gensim rouge2', 'Gensim rougeL', 'Gensim rougeLsum', 'Gensim bleu', 'Gensim bert', 'Gensim meteor',
  'spaCy rouge1', 'spaCy rouge2', 'spaCy rougeL', 'spaCy rougeLsum', 'spaCy bleu', 'spaCy bert', 'spaCy meteor',
  'TextRank rouge1', 'TextRank rouge2', 'TextRank rougeL', 'TextRank rougeLsum', 'TextRank bleu', 'TextRank bert', 'TextRank meteor'
]

describe_df = describe_df.apply(pd.Series.describe, axis = 0).T
describe_df = describe_df.reset_index()
describe_df[['library', 'scorer']] = pd.DataFrame(describe_df['index'].str.split(' ').tolist(), columns = ['library', 'scorer'])
describe_df = describe_df.drop(['index', 'count'], axis=1)
describe_df = describe_df.groupby(['scorer']).apply(lambda x: x)
describe_df = describe_df[['library', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']]
describe_df