In [2]:
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import json 
import re

In [50]:
output_dir = "/gscratch/balazinska/enhaoz/complex_event_video/src/outputs/clevrer_collision/"
output_files = ["greatest_confidence-nearby3-random_forest_with_balanced_class_weights.txt", "least_confidence-nearby3-random_forest_with_balanced_class_weights.txt", "random-nearby3-random_forest_with_balanced_class_weights.txt", "greatest_confidence-nearby3-random_forest_with_balanced_class_weights-with_heuristic.txt", "least_confidence-nearby3-random_forest_with_balanced_class_weights-with_heuristic.txt",
"random-nearby3-random_forest_with_balanced_class_weights-with_heuristic.txt"]
method_names = ["greatest_confidence", "least_confidence", "random", "greatest_confidence-with_heuristic", "least_confidence-with_heuristic", "random-with_heuristic"]
table = []
for idx, output_file in enumerate(output_files):
    # Read json file
    with open(os.path.join(output_dir, output_file), "r") as f:
        data = json.loads(f.read())
        for i in range(0, len(data), 3):
            row1 = data[i]
            row2 = data[i+1]
            row3 = data[i+2]
            m = re.match('training with (.*) data: ', row1)
            num_train = int(m.group(1))
            m = re.match('\[evaluation pair level\] balanced_accuracy: (?P<balanced_accuracy>.*); f1_score: (?P<f1>.*); tn, fp, fn, tp: (?P<tn>.*), (?P<fp>.*), (?P<fn>.*), (?P<tp>.*)', row2)
            pair_level_balanced_accuracy = float(m.group('balanced_accuracy'))
            pair_level_f1 = float(m.group('f1'))
            pair_level_tn = int(m.group('tn'))
            pair_level_fp = int(m.group('fp'))
            pair_level_fn = int(m.group('fn'))
            pair_level_tp = int(m.group('tp'))
            m = re.match('\[evaluation frame level\] balanced_accuracy: (?P<balanced_accuracy>.*); f1_score: (?P<f1>.*); tn, fp, fn, tp: (?P<tn>.*), (?P<fp>.*), (?P<fn>.*), (?P<tp>.*)', row3)
            frame_level_balanced_accuracy = float(m.group('balanced_accuracy'))
            frame_level_f1 = float(m.group('f1'))
            frame_level_tn = int(m.group('tn'))
            frame_level_fp = int(m.group('fp'))
            frame_level_fn = int(m.group('fn'))
            frame_level_tp = int(m.group('tp'))
            table.append([method_names[idx], num_train, pair_level_balanced_accuracy, pair_level_f1, pair_level_tn, pair_level_fp, pair_level_fn, pair_level_tp, frame_level_balanced_accuracy, frame_level_f1, frame_level_tn, frame_level_fp, frame_level_fn, frame_level_tp])
df = pd.DataFrame(table, columns = ['method', 'num_train', 'pair_level_balanced_accuracy', 'pair_level_f1', 'pair_level_tn', 'pair_level_fp', 'pair_level_fn', 'pair_level_tp', 'frame_level_balanced_accuracy', 'frame_level_f1', 'frame_level_tn', 'frame_level_fp', 'frame_level_fn', 'frame_level_tp'])
    
pair_level_balanced_accuracy_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('pair_level_balanced_accuracy', title='Pair level balanced accuracy'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method')
)

pair_level_f1_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('pair_level_f1', title='Pair level f1 score'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method')
)

pair_level_balanced_accuracy_plot | pair_level_f1_plot

In [51]:
frame_level_balanced_accuracy_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('frame_level_balanced_accuracy', title='Frame level balanced accuracy'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method')
)

frame_level_f1_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('frame_level_f1', title='Frame level f1 score'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method')
)

frame_level_balanced_accuracy_plot | frame_level_f1_plot

In [52]:
instances_found_speed_dir = "/gscratch/balazinska/enhaoz/complex_event_video/src/outputs/clevrer_collision/instances_found_speed"
instances_found_speed_files = ["greatest_confidence-nearby3-random_forest_with_balanced_class_weights.json", "least_confidence-nearby3-random_forest_with_balanced_class_weights.json", "random-nearby3-random_forest_with_balanced_class_weights.json", "greatest_confidence-nearby3-random_forest_with_balanced_class_weights-with_heuristic.json", "least_confidence-nearby3-random_forest_with_balanced_class_weights-with_heuristic.json", "random-nearby3-random_forest_with_balanced_class_weights-with_heuristic.json"]
method_names = ["greatest_confidence", "least_confidence", "random", "greatest_confidence-with_heuristic", "least_confidence-with_heuristic", "random-with_heuristic"]
instances_found_speed_table = []
for idx, instances_found_speed_file in enumerate(instances_found_speed_files):
    with open(os.path.join(instances_found_speed_dir, instances_found_speed_file), "r") as f:
        data = json.loads(f.read())
        current = 0
        instances_found_speed_table.append([method_names[idx], 0, 0])
        num_instances_found = 0
        for i, y in enumerate(data): 
            if y > current: 
                num_instances_found += 1
                instances_found_speed_table.append([method_names[idx], i, num_instances_found])
                current = y

instances_found_speed = pd.DataFrame(instances_found_speed_table, columns = ['method', 'num_frames_examined', 'num_instances_found'])

instances_found_speed_plot = alt.Chart(instances_found_speed).mark_line().encode(
    x=alt.X('num_frames_examined', title='Number of frames examined'),
    y=alt.Y('num_instances_found', title='Number of instances found'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method')
)
# .configure_legend(orient="bottom", columns=1)

instances_found_speed_plot

# Near model and Far model 

In [3]:
output_dir = "/gscratch/balazinska/enhaoz/complex_event_video/src/outputs/"
output_files = ["clevrer_far/2.0-least_confidence-original-random_forest.txt", "clevrer_far/2.0-greatest_confidence-original-random_forest.txt", "clevrer_far/2.0-random-original-random_forest.txt", "clevrer_near/1.0-least_confidence-original-random_forest.txt", "clevrer_near/1.0-greatest_confidence-original-random_forest.txt", "clevrer_near/1.0-random-original-random_forest.txt"]
method_names = ["clevrer_far-least_confidence", "clevrer_far-greatest_confidence", "clevrer_far-random", "clevrer_near-least_confidence", "clevrer_near-greatest_confidence", "clevrer_near-random"]
table = []
for idx, output_file in enumerate(output_files):
    # Read json file
    with open(os.path.join(output_dir, output_file), "r") as f:
        data = json.loads(f.read())
        for i in range(0, len(data), 3):
            row1 = data[i]
            row2 = data[i+1]
            row3 = data[i+2]
            m = re.match('training with (.*) data: ', row1)
            num_train = int(m.group(1))
            m = re.match('\[evaluation pair level\] balanced_accuracy: (?P<balanced_accuracy>.*); f1_score: (?P<f1>.*); tn, fp, fn, tp: (?P<tn>.*), (?P<fp>.*), (?P<fn>.*), (?P<tp>.*)', row2)
            pair_level_balanced_accuracy = float(m.group('balanced_accuracy'))
            pair_level_f1 = float(m.group('f1'))
            pair_level_tn = int(m.group('tn'))
            pair_level_fp = int(m.group('fp'))
            pair_level_fn = int(m.group('fn'))
            pair_level_tp = int(m.group('tp'))
            m = re.match('\[evaluation frame level\] balanced_accuracy: (?P<balanced_accuracy>.*); f1_score: (?P<f1>.*); tn, fp, fn, tp: (?P<tn>.*), (?P<fp>.*), (?P<fn>.*), (?P<tp>.*)', row3)
            frame_level_balanced_accuracy = float(m.group('balanced_accuracy'))
            frame_level_f1 = float(m.group('f1'))
            frame_level_tn = int(m.group('tn'))
            frame_level_fp = int(m.group('fp'))
            frame_level_fn = int(m.group('fn'))
            frame_level_tp = int(m.group('tp'))
            table.append([method_names[idx], num_train, pair_level_balanced_accuracy, pair_level_f1, pair_level_tn, pair_level_fp, pair_level_fn, pair_level_tp, frame_level_balanced_accuracy, frame_level_f1, frame_level_tn, frame_level_fp, frame_level_fn, frame_level_tp])
df = pd.DataFrame(table, columns = ['method', 'num_train', 'pair_level_balanced_accuracy', 'pair_level_f1', 'pair_level_tn', 'pair_level_fp', 'pair_level_fn', 'pair_level_tp', 'frame_level_balanced_accuracy', 'frame_level_f1', 'frame_level_tn', 'frame_level_fp', 'frame_level_fn', 'frame_level_tp'])
    
pair_level_balanced_accuracy_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('pair_level_balanced_accuracy', title='Pair level balanced accuracy'), 
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method'),
    tooltip=['method', 'num_train', alt.Tooltip('pair_level_balanced_accuracy', title="balanced acc")]
)

pair_level_f1_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('pair_level_f1', title='Pair level f1 score'),
    # scale=alt.Scale(domain=[0.7, 1])
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method'),
    tooltip=['method', 'num_train', alt.Tooltip('pair_level_f1', title="F1 score")]
)

pair_level_balanced_accuracy_plot | pair_level_f1_plot

In [4]:
frame_level_balanced_accuracy_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('frame_level_balanced_accuracy', title='Frame level balanced accuracy'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method'),
    tooltip=['method', 'num_train', alt.Tooltip('frame_level_balanced_accuracy', title="balanced acc")]
)

frame_level_f1_plot = alt.Chart(df).mark_line().encode(
    x=alt.X('num_train', title='Number of training samples'),
    y=alt.Y('frame_level_f1', title='Frame level f1 score'),
    color=alt.Color('method'),
    strokeDash=alt.StrokeDash('method'),
    tooltip=['method', 'num_train', alt.Tooltip('frame_level_f1', title="F1 score")]
)

frame_level_balanced_accuracy_plot | frame_level_f1_plot