# Initial setup

If we're running on the remote environment such as Vast.ai, additional setup is required including

- Download and install necessary libraries
- Download and extract data

## System specs

In [2]:
!cat /etc/os-release

PRETTY_NAME="Ubuntu 22.04.3 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.3 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=jammy


In [3]:
!nvidia-smi

Mon Oct 14 13:44:51 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.171.04             Driver Version: 535.171.04   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4080        On  | 00000000:81:00.0 Off |                  N/A |
| 50%   40C    P8               8W / 320W |      1MiB / 16376MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Install libraries

In [4]:
!sudo apt install unzip

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unzip is already the newest version (6.0-26ubuntu3.2).
0 upgraded, 0 newly installed, 0 to remove and 62 not upgraded.


In [5]:
!pip install gdown fairseq editdistance tqdm

[33mDEPRECATION: omegaconf 2.0.6 has a non-standard dependency specifier PyYAML>=5.1.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of omegaconf or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

## Prepare data

### Check working directory existance

In [6]:
import os
import gdown

print('[!] Working directory', os.getcwd())

data_eval_dir = 'data-eval'

# Check if the eval directory exists
if not os.path.exists(data_eval_dir):
  os.makedirs(data_eval_dir)
  print(f"-> Directory '{data_eval_dir}' created successfully!")
else:
  print(f"Directory '{data_eval_dir}' already exists.")

[!] Working directory /workspace/jupyter-bifi-adaptation
Directory 'data-eval' already exists.


### Download token vocab file

In [7]:
token_vocab_output_path = os.path.join(data_eval_dir, "token_vocab.txt")

gdown.download(id="1-JLsekCMygk1DSkbA9kQHLJeEPBUWyJM", output=token_vocab_output_path, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1-JLsekCMygk1DSkbA9kQHLJeEPBUWyJM
To: /workspace/jupyter-bifi-adaptation/data-eval/token_vocab.txt
100%|██████████| 130k/130k [00:00<00:00, 2.25MB/s]


'data-eval/token_vocab.txt'

### Download and unzip orig_bad_code

In [19]:
orig_bad_code_output_path = os.path.join(data_eval_dir, "orig_bad_code.zip")

gdown.download(id="12DIzvqty2ZwcH4KSaZ6I3rhU34EN4Bd-", output=orig_bad_code_output_path, quiet=False)
!unzip -f {orig_bad_code_output_path} -d {data_eval_dir}

Downloading...
From: https://drive.google.com/uc?id=12DIzvqty2ZwcH4KSaZ6I3rhU34EN4Bd-
To: /workspace/jupyter-bifi-adaptation/data-eval/orig_bad_code.zip
100%|██████████| 14.5M/14.5M [00:01<00:00, 10.8MB/s]


Archive:  data-eval/orig_bad_code.zip


### Download and unzip round 0

In [24]:
round_0_output_path = os.path.join(data_eval_dir, "round_0.zip")

gdown.download(id="13vgvSLaeffUsPR0NSLfiYTzHUK3q4L-y", output=round_0_output_path, quiet=False)
!unzip -o {round_0_output_path} -d {data_eval_dir}

Archive:  data-eval/round_0.zip
   creating: data-eval/round_0/model_fixer/
  inflating: data-eval/round_0/model_fixer/checkpoint.pt  
  inflating: data-eval/round_0/model_fixer/desktop.ini  
  inflating: data-eval/round_0/orig_bad/stats.json  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/dict.bad.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/dict.good.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/model-fixer.pred.evaluated.json  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/model-fixer.pred.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/preprocess.log  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/test.bad-good.bad.bin  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/test.bad-good.bad.idx  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/desktop.ini  
  inflating: data-ev

### Download and unzip round 1 - Fixer Only

In [24]:
round_0_output_path = os.path.join(data_eval_dir, "round_0.zip")

gdown.download(id="13vgvSLaeffUsPR0NSLfiYTzHUK3q4L-y", output=round_0_output_path, quiet=False)
!unzip -o {round_0_output_path} -d {data_eval_dir}

Archive:  data-eval/round_0.zip
   creating: data-eval/round_0/model_fixer/
  inflating: data-eval/round_0/model_fixer/checkpoint.pt  
  inflating: data-eval/round_0/model_fixer/desktop.ini  
  inflating: data-eval/round_0/orig_bad/stats.json  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/dict.bad.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/dict.good.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/model-fixer.pred.evaluated.json  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/model-fixer.pred.txt  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/preprocess.log  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/test.bad-good.bad.bin  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/test.bad-good.bad.idx  
  inflating: data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0/desktop.ini  
  inflating: data-ev

### Download and unzip round 1 - BIFI

In [21]:
round_1_bifi_output_path = os.path.join(data_eval_dir, "round_1_bifi.zip")

gdown.download(id="12TuHRvsMIaOt5t6QhsvVbWi29L6kfvay", output=round_1_bifi_output_path, quiet=False)
!unzip -o {round_1_bifi_output_path} -d {data_eval_dir}

Downloading...
From (original): https://drive.google.com/uc?id=12TuHRvsMIaOt5t6QhsvVbWi29L6kfvay
From (redirected): https://drive.google.com/uc?id=12TuHRvsMIaOt5t6QhsvVbWi29L6kfvay&confirm=t&uuid=28bbdddb-12a5-4cd1-b594-b452d966a6aa
To: /workspace/jupyter-bifi-adaptation/data-eval/round_1_bifi.zip
100%|██████████| 149M/149M [00:01<00:00, 89.4MB/s] 


Archive:  data-eval/round_1_bifi.zip


# Import libraries

In [11]:
%load_ext autoreload

# Check PyTorch version
import torch
print('Torch', torch.__version__)

import fairseq
print('fairseq', fairseq.__version__)

import sys
import json, os, re

sys.path.insert(0, 'utils')  # Replace with the actual path

Torch 2.3.1


2024-10-14 13:46:15 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX


fairseq 0.12.2


In [12]:
DATA_DIR = 'data-eval'
os.environ["DATA_DIR"] = DATA_DIR

In [13]:
%autoreload 2

from tqdm import tqdm
from copy import deepcopy
from collections import defaultdict, OrderedDict
import hashlib
from pathlib import Path
import os

from utils.code_error_checker import check_paren_error, check_ast_error
from utils.code_utils import preprocess_unk, code_toks_to_code_string, get_diff_metric, tokenize_python_code
from utils.fairseq_utils import parse_fairseq_preds, fairseq_preprocess, fairseq_generate, fairseq_train

# Common functions

In [14]:
def eval_one_pred_obj(pred_obj):
    # Deal with UNK
    _, unk_dict = preprocess_unk(pred_obj['code_toks_raw'])
    anonymize_dict = pred_obj['anonymize_dict']
    if anonymize_dict is None:
        anonymize_dict = {}
    anonymize_dict['<unk>'] = unk_dict
    anonymize_dict['<STRING>'] = []
    anonymize_dict['<COMMENT>'] = []
    #
    src = pred_obj['src'] #this is tok_format i.e. ' '.join(code_toks)
    src_code  = code_toks_to_code_string(src, anonymize_dict) #this is string_format
    ret_obj = {'progid': pred_obj['progid'],
               'orig_err_obj': pred_obj['orig_err_obj'],
               'anonymize_dict': pred_obj['anonymize_dict']
               }
    ret_obj['src']  = {'tok_format': src, 'string_format': src_code}
    #Get string_format from predicted code toks
    ret_obj['pred'] = []
    for pred in pred_obj['pred']:
        pred_code = code_toks_to_code_string(pred, anonymize_dict) #this is string_format
        orig_err_obj = pred_obj['orig_err_obj']
        if orig_err_obj['msg'] == 'unbalanced (){}[]':
            #NOTE: `pred` is tok_format i.e. ' '.join(code_toks)
            res = check_paren_error(pred.split())
        else:
            res = check_ast_error(pred_code)
        diff_metric = get_diff_metric(src, pred)
        ret_obj['pred'].append({'tok_format': pred,
                                'string_format': pred_code,
                                'err_obj': res,
                                'diff_metric': diff_metric})
    return ret_obj

def eval_one_split(pred_dir_prefix, split, pred_fname, n_workers=80):
    pred_dir   = f'{pred_dir_prefix}{split}'
    pred_path = Path(f'{pred_dir}/{pred_fname}')
    preds = parse_fairseq_preds(str(pred_path))
    #load progids
    data_dir = DATA_DIR
    progids = [l.strip() for l in open(f'{data_dir}/orig_bad_code/orig.{split}.id')]
    assert len(preds) == len(progids)
    #load original err_obj
    bads = json.load(open(f'{data_dir}/orig_bad_code/orig.bad.json'))
    for j in range(len(preds)):
        progid = progids[j]
        preds[j]['progid'] = progid
        preds[j]['orig_err_obj'] = bads[progid]['err_obj']
        code_toks_raw = bads[progid]['code_toks_joined'].split()
        anonymize_dict = bads[progid]['anonymize_dict']
        if 'window_span' in bads[progid]:
            ws = bads[progid]['window_span']
            code_toks_raw = code_toks_raw[ws[0]:ws[1]]
            anonymize_dict = None
        preds[j]['code_toks_raw'] = code_toks_raw
        preds[j]['anonymize_dict'] = anonymize_dict
    #
    print ('len(preds)', len(preds))
    # with Pool(n_workers) as p:
    #     res = list(tqdm(p.imap(eval_one_pred_obj, preds), total=len(preds)))
    res = list(tqdm(map(eval_one_pred_obj, preds)))  # or list(tqdm([eval_one_pred_obj(pred) for pred in preds]))

    '''
      res: list of {'progid': , 'orig_err_obj': , 'anonymize_dict': ,
                    'src': {'tok_format': , 'string_format': },
                    'pred': {'tok_format':, 'string_format':, 'err_obj': }
                    }
    '''
    with open(f'{pred_path.parent}/{pred_path.stem}.evaluated.json', 'w') as f:
        json.dump(res, f, indent=2)

def get_test_result(pred_dir_prefix, pred_fname):
    #
    def collate_eval():
        success  = []; denom = 0
        success_by_group = defaultdict(list); denom_by_group = defaultdict(int)
        agg_obj = {}
        for split in {3,4}: #heldout test set
            print ('split', split)
            pred_dir   = Path(f'{pred_dir_prefix}{split}')
            pred_path  = pred_dir/pred_fname
            pred_eval_path = f'{pred_path.parent}/{pred_path.stem}.evaluated.json'
            eval_objs = json.load(open(pred_eval_path))
            for eval_obj in eval_objs:
                progid = eval_obj['progid']
                orig_err_type = eval_obj['orig_err_obj']['msg']
                if 'indent' in orig_err_type:
                    orig_err_type = 'indentation error'
                denom += 1
                denom_by_group[orig_err_type] += 1
                for k, pred_obj in enumerate(eval_obj['pred']):
                    pred_err_obj = pred_obj['err_obj']
                    diff_metric  = pred_obj['diff_metric']
                    if (pred_err_obj == 0) and (0 < diff_metric <= 4):
                        name = '{:02d}-{}-{:03d}'.format(split, progid, k)
                        success.append(name)
                        success_by_group[orig_err_type].append(name)
        return success, denom, success_by_group, denom_by_group
    #
    def print_stats(name_list, _denom):
        top1 = set()
        for name in name_list:
            split, progid, k = name.split('-')
            if int(split) in {3,4}: #test set
                if int(k)==0:
                    top1.add(f'{split}-{progid}')
        acc = len(top1)/float(_denom)*100
        print ('   acc: {} ({:.1f}%) | denom {}'.format(len(top1), acc, _denom))
        return acc
    #
    success, denom, success_by_group, denom_by_group = collate_eval()
    acc_dict = {}
    print ('Total'); acc = print_stats(success, denom); acc_dict['total'] = acc
    print ('-'*50)
    for err_type in success_by_group:
        print (f'{err_type.capitalize()}')
        acc = print_stats(success_by_group[err_type], denom_by_group[err_type])
        acc_dict[err_type] = acc
    json.dump(acc_dict, open(Path(pred_dir_prefix).parent/'stats.json', 'w'), indent=2)

# Param config

In [15]:
BEAM = 10
NBEST = 10

# Evaluate Round 0


## Prepare environment

In [25]:
data_dir = Path(DATA_DIR)
round_dir = data_dir/'round_0'

# Run fixer
model_dir  = round_dir/'model_fixer'
model_path = model_dir/'checkpoint.pt'
destdir_root = round_dir/'orig_bad'

n_splits = 5

# Evaluate
pred_dir_root = round_dir/'orig_bad'
pred_dir_prefix = str(pred_dir_root/'fairseq_preprocess__orig_bad.')
pred_fname  = 'model-fixer.pred.txt'

## Perform fix

In [26]:
for split in range(n_splits):
    destdir    = destdir_root/f'fairseq_preprocess__orig_bad.{split}'
    pred_path  = destdir/'model-fixer.pred.txt'
    if os.path.exists(pred_path):
        os.remove(pred_path)
    fairseq_generate(str(destdir), str(model_path), str(pred_path),
                     src='bad', tgt='good', gen_subset='test',
                     beam=BEAM, nbest=NBEST, max_len_a=1, max_len_b=50, max_tokens=7000)

fairseq-generate             data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.0         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_0/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 13:55:19 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 13:55:21 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.1         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_0/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 13:59:26 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 13:59:28 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.2         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_0/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:03:31 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:03:33 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.3         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_0/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:07:33 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:07:35 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_0/orig_bad/fairseq_preprocess__orig_bad.4         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_0/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:11:33 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:11:35 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

## Evaluate

In [27]:
for split in range(n_splits):
    eval_one_split(pred_dir_prefix, split, pred_fname, n_workers=10)

get_test_result(pred_dir_prefix, pred_fname)

len(preds) 7528


7528it [00:10, 718.86it/s]


len(preds) 7528


7528it [00:10, 744.97it/s]


len(preds) 7528


7528it [00:10, 746.15it/s]


len(preds) 7528


7528it [00:10, 747.71it/s]


len(preds) 7527


7527it [00:10, 744.31it/s]


split 3
split 4
Total
   acc: 9341 (62.0%) | denom 15055
--------------------------------------------------
Unbalanced (){}[]
   acc: 3508 (87.7%) | denom 3999
Invalid syntax
   acc: 3350 (70.5%) | denom 4749
Indentation error
   acc: 2483 (39.4%) | denom 6307


# Evaluate FixerOnly - Round 1

## Prepare environment

In [34]:
data_dir = Path(DATA_DIR)
round_dir = data_dir/'round_1_fixer_only'

# Run fixer
model_dir  = round_dir/'model-fixer'
model_path = model_dir/'checkpoint.pt'
destdir_root = round_dir/'orig_bad'

n_splits = 5

# Evaluate
pred_dir_root = round_dir/'orig_bad'
pred_dir_prefix = str(pred_dir_root/'fairseq_preprocess__orig_bad.')
pred_fname  = 'model-fixer.pred.txt'

## Perform fix

In [37]:
for split in range(n_splits):
    destdir    = destdir_root/f'fairseq_preprocess__orig_bad.{split}'
    pred_path  = destdir/'model-fixer.pred.txt'

    if os.path.exists(pred_path):
        os.remove(pred_path)
    
    fairseq_generate(str(destdir), str(model_path), str(pred_path),
                     src='bad', tgt='good', gen_subset='test',
                     beam=BEAM, nbest=NBEST, max_len_a=1, max_len_b=50, max_tokens=7000)

fairseq-generate             data\round_1_fixer_only\orig_bad\fairseq_preprocess__orig_bad.0         --source-lang bad --target-lang good         --gen-subset test         --path data\round_1_fixer_only\model-fixer\checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 5         --beam 5 --max-tokens 7000 
fairseq-generate             data\round_1_fixer_only\orig_bad\fairseq_preprocess__orig_bad.1         --source-lang bad --target-lang good         --gen-subset test         --path data\round_1_fixer_only\model-fixer\checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 5         --beam 5 --max-tokens 7000 
fairseq-generate             data\round_1_fixer_only\orig_bad\fairseq_preprocess__orig_bad.2         --source-lang bad --target-lang good         --gen-subset test         --path data\round_1_fixer_only\model-fixer\checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 5         --beam 5 --max-tokens 7000 
fairseq-generate

## Evaluate

In [39]:
print(pred_dir_prefix, pred_fname)
for split in range(n_splits):
    eval_one_split(pred_dir_prefix, split, pred_fname, n_workers=10)

get_test_result(pred_dir_prefix, pred_fname)

data\round_1_fixer_only\orig_bad\fairseq_preprocess__orig_bad. model-fixer.pred.txt
len(preds) 7528


7528it [00:05, 1384.20it/s]


len(preds) 7528


7528it [00:05, 1444.85it/s]


len(preds) 7528


7528it [00:05, 1402.32it/s]


len(preds) 7528


7528it [00:05, 1427.16it/s]


len(preds) 7527


7527it [00:05, 1356.43it/s]


split 3
split 4
Total
   acc: 13067 (86.8%) | denom 15055
--------------------------------------------------
Unbalanced (){}[]
   acc: 3730 (93.3%) | denom 3999
Invalid syntax
   acc: 4321 (91.0%) | denom 4749
Indentation error
   acc: 5016 (79.5%) | denom 6307


# Evaluate BIFI - Round 1

## Prepare environment

In [29]:
data_dir = Path(DATA_DIR)
round_dir = data_dir/'round_1_bifi'

# Run fixer
model_dir  = round_dir/'model_fixer'
model_path = model_dir/'checkpoint.pt'
destdir_root = round_dir/'orig_bad'

n_splits = 5

# Evaluate
pred_dir_root = round_dir/'orig_bad'
pred_dir_prefix = str(pred_dir_root/'fairseq_preprocess__orig_bad.')
pred_fname  = 'model-fixer.pred.txt'

## Perform fix

In [30]:
for split in range(n_splits):
    destdir    = destdir_root/f'fairseq_preprocess__orig_bad.{split}'
    pred_path  = destdir/'model-fixer.pred.txt'
    
    if os.path.exists(pred_path):
        os.remove(pred_path)
    
    fairseq_generate(str(destdir), str(model_path), str(pred_path),
                     src='bad', tgt='good', gen_subset='test',
                     beam=BEAM, nbest=NBEST, max_len_a=1, max_len_b=50, max_tokens=7000)

fairseq-generate             data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad.0         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_1_bifi/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:23:12 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:23:14 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad.1         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_1_bifi/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:27:23 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:27:25 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad.2         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_1_bifi/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:31:30 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:31:32 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad.3         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_1_bifi/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:35:30 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:35:32 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

fairseq-generate             data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad.4         --source-lang bad --target-lang good         --gen-subset test         --path data-eval/round_1_bifi/model_fixer/checkpoint.pt         --max-len-a 1         --max-len-b 50         --nbest 10         --beam 10 --max-tokens 7000 


2024-10-14 14:39:37 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
2024-10-14 14:39:39 | INFO | fairseq_cli.generate | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': 

## Evaluate

In [31]:
print(pred_dir_prefix, pred_fname)
for split in range(n_splits):
    eval_one_split(pred_dir_prefix, split, pred_fname, n_workers=10)

get_test_result(pred_dir_prefix, pred_fname)

data-eval/round_1_bifi/orig_bad/fairseq_preprocess__orig_bad. model-fixer.pred.txt
len(preds) 7528


7528it [00:08, 878.68it/s]


len(preds) 7528


7528it [00:08, 887.77it/s]


len(preds) 7528


7528it [00:08, 860.94it/s]


len(preds) 7528


7528it [00:08, 887.68it/s]


len(preds) 7527


7527it [00:08, 885.38it/s]


split 3
split 4
Total
   acc: 13244 (88.0%) | denom 15055
--------------------------------------------------
Unbalanced (){}[]
   acc: 3765 (94.1%) | denom 3999
Invalid syntax
   acc: 4348 (91.6%) | denom 4749
Indentation error
   acc: 5131 (81.4%) | denom 6307
