In [1]:
%load_ext autoreload
%autoreload 2

import os
from typing import *

from spot.utils import proj_root, get_data_dir

os.chdir(proj_root())

datadir = get_data_dir()

In [2]:
# experiment configurations

from spot.data import (
    SrcDataset,
    get_dataset_name,
    load_src_datasets,
    TypeCheckSettings,
)
from spot.model import CtxArgs, DecodingArgs, ModelSPOT, ModelWrapper
from copy import copy
from spot.train import TrainingConfig, TypeCheckArgs

config = TrainingConfig(
    quicktest=False,
    all_labels=True,
    ctx_size=2048,
    left_margin=1024,
    right_margin=512,
)
gpu_id = 0
TypeCheckSettings.temp_path = f"DAgger-{gpu_id}"

project_name = "test-SPOT" if config.quicktest else "SPOT"
train_ctx_args = config.train_ctx_args()
tc_args = TypeCheckArgs(check_in_isolation=config.check_in_isolation)

max_tokens_per_file = config.ctx_size
dec_args = DecodingArgs(
    sampling_max_tokens=8 * max_tokens_per_file,
    ctx_args=config.dec_ctx_args(),
)

datasets_name = get_dataset_name(
    drop_comments=config.drop_comments,
    all_labels=config.all_labels,
)

model_name = "DAgger-model--" + config.as_name()

src_datasets = load_src_datasets(
    datadir,
    datasets_name,
    data_reduction=config.data_reduction,
    quicktest=config.quicktest,
)


  warn(f"Failed to load image Python extension: {e}")


Loading datasets:  src_datasets-all_labels-drop_comments


In [3]:
from spot.model import load_model_spot, DefaultTokenizer
from spot.model import ModelWrapper
from spot.dagger import DAggerModel
import torch

model = load_model_spot("Salesforce/codet5-base")
wrapper = ModelWrapper(model, DefaultTokenizer, dec_args)
device = torch.device(f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu")
wrapper.to(device)
dmodel = DAggerModel(wrapper)


In [5]:
from spot.utils import pretty_print_dict

eval_r = await dmodel.eval_on_data(src_datasets["test"][0:50])
pretty_print_dict(eval_r.accuracies)


compute_preexisting_fdbks: 100%|██████████| 4/4 [00:02<00:00,  1.43it/s]
Evaluating: 100%|██████████| 920/920 [03:47<00:00,  4.04it/s]


partial_acc (ImNone): 5.54% (count=920)
full_acc (ImNone): 5.11% (count=920)
partial_acc: 5.00% (count=920)
ast_acc: 3.66% (count=1.1k)
full_acc: 4.57% (count=920)
partial_acc_by_cat:
   FuncArg: 2.26% (count=531)
   FuncReturn: 12.00% (count=275)
   ClassAtribute: 0.00% (count=71)
   GlobalVar: 0.00% (count=1)
   LocalVar: 2.38% (count=42)
partial_acc_by_pos:
   range(0, 1): 6.00% (count=50)
   range(1, 2): 0.00% (count=44)
   range(2, 4): 7.69% (count=78)
   range(4, 8): 1.56% (count=128)
   range(8, 16): 3.21% (count=187)
   range(16, 32): 8.21% (count=207)
   range(32, 64): 7.01% (count=157)
   range(64, 128): 1.45% (count=69)
avg_label_size: 1.2478
avg_pred_size: 1.0076


In [4]:
from spot.dagger import DAggerArgs
import wandb

wandb.init(
    project=project_name,
    name=model_name,
    config=config.as_dict(),
    dir=str(datadir),
)

await dmodel.train_on_data(
    src_datasets, DAggerArgs(config.grad_accum_labels), log_fn=lambda t, x: wandb.log(x, step=t)
)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmrvplusone[0m. Use [1m`wandb login --relogin`[0m to force relogin


compute_preexisting_fdbks: 100%|██████████| 612/612 [01:01<00:00,  9.96it/s]
Training:   0%|          | 1/295457 [00:06<538:10:49,  6.56s/it]


TypeError: <lambda>() got an unexpected keyword argument 'step'

In [None]:
from spot.utils import pretty_print_dict

eval_r = await dmodel.eval_on_data(src_datasets["test"][0:50])
pretty_print_dict(eval_r.accuracies)


compute_preexisting_fdbks: 100%|██████████| 4/4 [00:02<00:00,  1.43it/s]
Evaluating:  14%|█▎        | 126/920 [00:27<02:50,  4.65it/s]
Traceback (most recent call last):
  File "/home/jiayi/Projects/SPOT/src/spot/dagger.py", line 269, in task
    return await f(x)
  File "/home/jiayi/Projects/SPOT/src/spot/dagger.py", line 225, in eval_step
    r = await self.run_on_src(
  File "/home/jiayi/Projects/SPOT/src/spot/dagger.py", line 106, in run_on_src
    check_r = await eloop.run_in_executor(
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/jiayi/Projects/SPOT/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3396, in run_code
    await eval(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_49914/923053019.py", line 3, in <cell line: 3>
    eval_r = await dmodel.eval_on_data(src_datasets["test"][0:50])
  File "/home/jiayi/Projects/SPOT/src/spo

Unexpected exception formatting exception. Falling back to standard exception


In [None]:
from spot.utils import pretty_print_dict

eval_r = await dmodel.eval_on_data(src_datasets["test"])
pretty_print_dict(eval_r.accuracies)
