In [1]:
%%writefile experiment_evaluation_batchsize_n_epoch_effect.py
from segmenter.using_whisper.evaluation_on_dataset import get_whisper_tagging_on_dataset
from segmenter.using_whisper.model_inference import get_model
import datasets
import torch

import pandas as pd
from sklearn import metrics

def get_evaluation_on_testset(model_path,model_arc):
    """
    loads the model and the 5% testset dataset. 
    Then applys whisper evaluation on the dataset 
    and gets the kappa values for each metric. 
    """
    device = torch.device("cuda")
    model = get_model(
        device=device,
        model_path=model_path,
        model_arc=model_arc,
    )
    ds = datasets.DatasetDict.load_from_disk("/home/moshebr/dharelg/moshe/ds_eran_original/")["test"]
    df_eval_for_ds_ = get_whisper_tagging_on_dataset(ds,model)

    rr=df_eval_for_ds_
    rr_wo_dups = rr[rr.dataset_id.duplicated()]
    return pd.Series({
        "segmentation_kappa":metrics.cohen_kappa_score(rr.segmentation_truth,rr.segmentation_pred),
        "segmentation_kappa_wo_start_of_turns":metrics.cohen_kappa_score(rr_wo_dups.segmentation_truth,rr_wo_dups.segmentation_pred),
        "focus_kappa":metrics.cohen_kappa_score(rr.emphasis_truth,rr.emphasis_pred),
        "prototype_kappa":metrics.cohen_kappa_score(rr.prototype_truth,rr.prototype_pred),
        "model_path":model_path,
        "model_arc":model_arc,
    })


samples2 = pd.DataFrame([ # model_path,model_arc, desc
(
    f"/home/moshebr/dharelg/moshe/experiment_8_batchsize10_modelarcsmall_labelsreg/checkpoint-{i}00/pytorch_model.bin",
    "small",
    f"smal batch 10, {i}00 steps (1 epoch) token-per-tag",
) for i in range(1,11)
], columns = ["model_path","model_arch","model_description"])
# samples2 = pd.DataFrame([ # model_path,model_arc, desc
#     (
#         "/home/moshebr/dharelg/moshe/experiment_2_batchsize10_modelarcmedium/checkpoint-100/pytorch_model.bin",
#         "medium",
#         "medium batch 10, 100 steps (1 epoch)",
#     ),(
#         "/home/moshebr/dharelg/moshe/experiment_2_batchsize10_modelarcmedium/checkpoint-300/pytorch_model.bin",
#         "medium",
#         "medium batch 10, 300 steps (3 epochs)",
#     ),(
#         "/home/moshebr/dharelg/moshe/experiment_2_batchsize10_modelarcmedium/checkpoint-600/pytorch_model.bin",
#         "medium",
#         "medium batch 10, 600 steps (6 epochs)",
#     ),(
#         '/home/moshebr/dharelg/moshe/experiment_1_batchsize10/checkpoint-300/pytorch_model.bin',
#         "small",
#         "small no accumulation, batch size 10, 300 steps",
#     ),(
#         '/home/moshebr/dharelg/moshe/recreation_script_again_w_small/checkpoint-1000/pytorch_model.bin',
#         "small",
#         "original train small (accumulation 16 and batch size 1 with 1000 steps)",
#     ),(
#         '/home/moshebr/dharelg/moshe/recreation_script_again_w_medium/checkpoint-1000/pytorch_model.bin',
#         "medium",
#         "original train medium (accumulation 16 and batch size 1 with 1000 steps)",
#     ),
# ], columns = ["model_path","model_arch","model_description"])
# samples2.to_csv("experiment_documentation_with_probably_some_errors2.csv")

aaa=samples2.apply(lambda x:get_evaluation_on_testset(x.model_path,x.model_arch),axis=1)

# aaa.to_csv("/home/moshebr/some_evaluations_test_set2_update.csv")

# print(aaa)
# aaa

import pandas as pd 
a=pd.merge(
  samples2,
  aaa
    ).drop(columns=["model_arc"])
a.to_csv("small_labels_verions_reg_eval.csv")
# with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
#     display(a.iloc[:,2:])

Overwriting experiment_evaluation_batchsize_n_epoch_effect.py


In [2]:
%%writefile experiment_evaluation_batchsize_n_epoch_effect.sh
sbatch <<EOT
#!/bin/bash
#SBATCH --job-name=booya # Job name
#SBATCH --mem=60gb # Job memory request
#SBATCH --partition=normal.q # partition request
#SBATCH --gres=gpu:a10:1 # GPU devices request
#SBATCH --output=/home/moshebr/notebooks/lets_do_whisper/experiment_evaluation_batchsize_n_epoch_effect.log # Standard output and error log
#SBATCH --time=3:00:00 # Time limitation 
#SBATCH --mail-user=barboym@yahoo.com # Email address for notification sending
#SBATCH --mail-type=END,FAIL # When to send email notification
/home/moshebr/.conda/envs/whisper/bin/python /home/moshebr/notebooks/lets_do_whisper/experiment_evaluation_batchsize_n_epoch_effect.py ${1}
EOT

Overwriting experiment_evaluation_batchsize_n_epoch_effect.sh


In [3]:
!bash experiment_evaluation_batchsize_n_epoch_effect.sh

Submitted batch job 33923


In [4]:
!squeue

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON) 
             33909  normal.q     bash    shify  R    5:46:29      1 n115 
             33922  normal.q     bash  moshebr  R      16:18      1 n115 
             33910  normal.q     bash zacharyl  R    5:45:23      1 n114 
             33903    long.q     bash   rodfon  R    9:28:59      1 n114 
             33905    long.q     bash     liav  R    8:11:17      1 n114 
             33907    long.q       jk    guylu  R    7:31:48      1 n114 


In [24]:
cat /home/moshebr/notebooks/lets_do_whisper/experiment_evaluation_batchsize_n_epoch_effect.log

Opening a new Slurm session:
SLURM_JOB_ID = 33835
SLURM_NODELIST = n131
------------------------------------------
Personal temporary local directory allocated: $TMPDIR=/local_data/33835_moshebr/
Your free local storage on your $TMPDIR is currently:  695G
You can use variable name $TMPDIR in your session
.


In [39]:
import pandas as pd 
a=pd.merge(
  samples2,
  aaa
    ).drop(columns=["model_arc"])
a.to_csv("small_labels_verions_reg_eval")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
    display(a.iloc[:,2:])

Unnamed: 0,model_description,segmentation_kappa,segmentation_kappa_wo_start_of_turns,focus_kappa,prototype_kappa
0,"medium batch 10, 100 steps (1 epoch)",0.925409,0.906374,0.295411,0.011703
1,"medium batch 10, 300 steps (3 epochs)",0.937554,0.92019,0.400008,0.502257
2,"medium batch 10, 600 steps (6 epochs)",0.927831,0.907225,0.474813,0.580043
3,"small no accumulation, batch size 10, 300 steps",0.922183,0.902103,0.423535,0.353964
4,original train small (accumulation 16 and batc...,0.891695,0.86269,0.51185,0.362958
5,original train medium (accumulation 16 and bat...,0.938912,0.922793,0.496077,0.526133
