In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import libcst as cst

from spot.tokenized_src import TokenizedSrc, stub_from_module
from spot.utils import Path, decode_tokens

ex_code = '''
from .utils import *
from .data import remove_comments, remove_imports
import spot


def gen_stub(m: cst.Module, rm_comments=True, rm_imports=True) -> cst.Module:
    """Removes all comments and docstrings."""
    if rm_comments:
        m = remove_comments(m)
    if rm_imports:
        m, _ = remove_imports(m)
    m = m.visit(StubGenerator())
    m = remove_empty_lines(m)
    spot.fly.attach(m)
    return m


def remove_empty_lines(m: cst.Module) -> cst.Module:
    m = m.visit(EmptyLineRemove())
    return m


OMIT = cst.SimpleStatementSuite([cst.Expr(cst.Ellipsis())])


@dataclass
class ClassNamespace:
    all_elems: set[str] = field(default_factory=set)
    declared_elems: set[str] = field(default_factory=set)


class StubGenerator(cst.CSTTransformer):
    """Generate a stub module from a Python module."""

    def __init__(self):
        self.ns_stack = list[ClassNamespace]()
        self.ns_stack = 1

    def register_elem(self, name: str, declared: bool):
        if self.ns_stack:
            s = self.ns_stack[-1]
            s.all_elems.add(name)
            if declared:
                s.declared_elems.add(name)

    def visit_ClassDef(self, node: cst.ClassDef):
        self.ns_stack.append(ClassNamespace())

    def leave_ClassDef(self, node, updated: cst.ClassDef):
        s = self.ns_stack.pop()
        to_declare = s.all_elems.difference(s.declared_elems)
        if to_declare:
            more_stmts = [cst.parse_statement(f"{n}: ...") for n in to_declare]
            new_stmts = list(updated.body.body) + more_stmts
            updated = updated.with_changes(
                body=updated.body.with_changes(body=new_stmts)
            )
        return updated

    def leave_FunctionDef(self, node, updated: cst.FunctionDef):
        self.register_elem(updated.name.value, True)
        return updated.with_changes(body=OMIT, returns=None)

    def leave_Annotation(self, node, updated: cst.Annotation):
        return updated.with_changes(annotation=cst.Ellipsis())

    def leave_Param(self, node, updated: cst.Param):
        if updated.default is not None:
            updated = updated.with_changes(default=cst.Ellipsis())
        return updated.with_changes(annotation=None)

    def leave_AnnAssign(self, node, updated: cst.AnnAssign):
        if updated.value is not None:
            updated = updated.with_changes(value=cst.Ellipsis())
        return updated

    def leave_Assign(self, node, updated: cst.AnnAssign):
        return updated.with_changes(value=cst.Ellipsis())

    def leave_Attribute(self, node, updated: cst.Assign):
        match updated:
            case cst.Attribute(
                value=cst.Name(value="self"),
                attr=cst.Name(value=elem_name),
            ):
                self.register_elem(elem_name, False)
        return updated


class EmptyLineRemove(cst.CSTTransformer):
    def on_leave(self, node, updated):
        if hasattr(updated, "leading_lines") and updated.leading_lines:
            return updated.with_changes(leading_lines=[])
        return updated

'''

ex_m = cst.parse_module(ex_code)
print(stub_from_module(ex_m).code)



def gen_stub(m, rm_comments=..., rm_imports=...): ...
def remove_empty_lines(m): ...
OMIT = ...
@dataclass
class ClassNamespace:
    all_elems: ... = ...
    declared_elems: ... = ...
class StubGenerator(cst.CSTTransformer):
    def __init__(self): ...
    def register_elem(self, name, declared): ...
    def visit_ClassDef(self, node): ...
    def leave_ClassDef(self, node, updated): ...
    def leave_FunctionDef(self, node, updated): ...
    def leave_Annotation(self, node, updated): ...
    def leave_Param(self, node, updated): ...
    def leave_AnnAssign(self, node, updated): ...
    def leave_Assign(self, node, updated): ...
    def leave_Attribute(self, node, updated): ...
    ns_stack: ...
class EmptyLineRemove(cst.CSTTransformer):
    def on_leave(self, node, updated): ...




In [5]:
from spot.static_analysis import PythonModule, compute_module_usages, PythonProject

pmod = PythonModule.from_cst(ex_m, "spot.stub")

In [21]:
from spot import proj_root
from spot.static_analysis import ProjectPath
from pprint import pprint


proj = PythonProject.from_root(proj_root() / "src")
pprint(compute_project_usages(proj), width=50, compact=True)


{proj'spot.critic/CriticModel.eval_on_src_dataset': [proj'spot.model/dynamic_dataloader'],
 proj'spot.critic/TrainCriticModelWrapper.configure_optimizers': [proj'spot.train/_configure_optimizers'],
 proj'spot.critic/to_critic_dataset': [proj'spot.type_check/normalize_type',
                                       proj'spot.type_check/normalize_type'],
 proj'spot.critic/train_critic_model': [proj'spot.model/dynamic_dataloader'],
 proj'spot.dagger/DAggerEvalResult.accuracies': [proj'spot.data/src_preds_to_accuracies'],
 proj'spot.dagger/DAggerModel.train_on_data': [proj'spot.train/_configure_optimizers',
                                               proj'spot.type_check/normalize_type',
                                               proj'spot.type_check/normalize_type'],
 proj'spot.dagger/get_typechecked_src': [proj'spot.tokenized_src/feedbacks_to_tokenized_src'],
 proj'spot.dagger/src_to_batch': [proj'spot.data/chunk_from_src'],
 proj'spot.data/GitRepo.collect_annotations': [proj'spot.t

In [23]:
from spot.static_analysis import PythonProject, proj_root
from pprint import pprint

pprint(PythonProject.from_root(proj_root() / "src").modules)

{'spot.__init__': PythonModule(n_functions=0, n_classes=0),
 'spot.critic': PythonModule(n_functions=5, n_classes=5),
 'spot.dagger': PythonModule(n_functions=6, n_classes=6),
 'spot.data': PythonModule(n_functions=23, n_classes=8),
 'spot.debug_critic': PythonModule(n_functions=4, n_classes=0),
 'spot.decode': PythonModule(n_functions=16, n_classes=5),
 'spot.model': PythonModule(n_functions=1, n_classes=3),
 'spot.static_analysis': PythonModule(n_functions=2, n_classes=8),
 'spot.tokenized_src': PythonModule(n_functions=8, n_classes=7),
 'spot.train': PythonModule(n_functions=8, n_classes=3),
 'spot.type_check': PythonModule(n_functions=11, n_classes=6),
 'spot.type_env': PythonModule(n_functions=9, n_classes=11),
 'spot.utils': PythonModule(n_functions=44, n_classes=10),
 'spot.visualization': PythonModule(n_functions=35, n_classes=0)}


In [22]:
code1 = """
# root.file1

# global function
def gf(x):
    return x * x

# with inner function
def gf_with_inner(x):
    def inner(y):
        return y * y
    return inner(x)

# class
class C:
    def __init__(self, x):
        self.x = x
    
    def foo(self, y):
        return self.x + y
    
"""
code2 = """
# root.file2
from .file1 import gf
from root.file1 import gf_with_inner
import root.file1
import root.file1 as f1

def usage1(x):
    gf(x) + root.file1.C(5)
    foo(5)

def usage2(x):
    def inner():
        1 + gf_with_inner(x)
    return inner()

def usage_method1(x):
    x = f1.C(5)
    1 + x.foo(3)

def usage_method2(x):
    (1 + f1.C(5)).foo(3)

def usage_local():
    usage1(3)
    UsageClass(4)

class UsageClass:
    def __init__(self, x):
        self.x = gf_with_inner(x)
        self.foo(5)

    def foo(self, y):
        return usage_local(f1.gf(1))
"""

project = PythonProject.from_modules(
    [
        PythonModule.from_cst(cst.parse_module(code1), "root.file1"),
        PythonModule.from_cst(cst.parse_module(code2), "root.file2"),
    ]
)

for u in compute_module_usages(project.modules["root.file2"]):
    print(str(u))

(proj'root.file2/usage1', CodeRange(start=CodePosition(line=9, column=4), end=CodePosition(line=9, column=9)), QualifiedName(name='.file1.gf', source=<QualifiedNameSource.IMPORT: 1>))
(proj'root.file2/usage1', CodeRange(start=CodePosition(line=9, column=12), end=CodePosition(line=9, column=27)), QualifiedName(name='root.file1.C', source=<QualifiedNameSource.IMPORT: 1>))
(proj'root.file2/usage2', CodeRange(start=CodePosition(line=14, column=12), end=CodePosition(line=14, column=28)), QualifiedName(name='root.file1.gf_with_inner', source=<QualifiedNameSource.IMPORT: 1>))
(proj'root.file2/usage2', CodeRange(start=CodePosition(line=15, column=11), end=CodePosition(line=15, column=18)), QualifiedName(name='usage2.<locals>.inner', source=<QualifiedNameSource.LOCAL: 3>))
(proj'root.file2/usage_method1', CodeRange(start=CodePosition(line=18, column=8), end=CodePosition(line=18, column=15)), QualifiedName(name='root.file1.C', source=<QualifiedNameSource.IMPORT: 1>))
(proj'root.file2/usage_metho

In [22]:
import libcst as cst

from spot.tokenized_src import TokenizedSrc, PreprocessArgs
from spot.utils import Path, decode_tokens

ex_code = '''# document comment 1
  # document comment 2
"""String document commnet"""
import os; import spot;
from sys import argv, exit
# after import
@wraps(function)
def catch_permission_denied(function):
    import some.inner.imports
    """
    Decorator to catch :class:`psycopg2.ProgrammingError` exceptions with the
    ``INSUFFICIENT_PRIVILEGE`` error code and rethrow them as
    :class:`~werkzeug.exceptions.Forbidden` exceptions instead.
    """
    @wraps(function)
    def decorated(x: str, y: int) -> str:
        try:
            # comment 1
            # comment 1 cont
            return function(*args, **kwargs)

        except InsufficientPrivilege as error:
            LOG.error("Forbidden: %s", error) # comment 2
            raise Forbidden()

    return decorated
'''
pre_args = PreprocessArgs(stub_in_preamble=True)
ex_src = TokenizedSrc.parse(ex_code, Path("test_file"), Path("test_repo"), pre_args)
print(decode_tokens(ex_src.tokenized_code))


@wraps(function)
def catch_permission_denied(function):
    import some.inner.imports
    @wraps(function)
    def decorated(x: <mask>, y: <mask>) -> <mask>:
        try:
            return function(*args, **kwargs)

        except InsufficientPrivilege as error:
            LOG.error("Forbidden: %s", error) 
            raise Forbidden()

    return decorated



In [16]:
from spot.data import src_to_chunks_, CtxArgs, PreprocessArgs
from ipywidgets import interactive

pre_args = PreprocessArgs(stub_in_preamble=True)
ex_src = TokenizedSrc.parse(ex_code, Path("test_file"), Path("test_repo"), pre_args)


def print_code(
    preamble: int,
    left: int,
    right: int,
    ctx_size: int,
    max_labels: int,
    chunk_id: int,
    inline_prev: bool,
):
    chunks = []
    args = CtxArgs(ctx_size, preamble, left, right, max_labels=max_labels, inline_prev_gold=inline_prev)
    src_to_chunks_(chunks, [], ex_src, (0, len(ex_src.types)), args)
    print(decode_tokens(chunks[chunk_id]["input_ids"]))


interactive(
    print_code,
    preamble=(1, 100),
    left=(1, 200),
    right=(1, 100),
    ctx_size=(1, 500),
    max_labels=(1, 10),
    chunk_id=(0,1),
    inline_prev=True,
)


interactive(children=(IntSlider(value=50, description='preamble', min=1), IntSlider(value=100, description='le…

In [9]:
from spot.data import SrcDataset
from spot.utils import DefaultTokenizer, proj_root

simple_dataset = SrcDataset.from_repos(
    proj_root() / "data",
    [proj_root() / "data/code"],
    DefaultTokenizer,
    drop_comments=True,
    max_workers=10,
    label_ratio=0.5,
)


TypeError: SrcDataset.from_repos() got an unexpected keyword argument 'drop_comments'

In [25]:
file2preds = {
    (proj_root() / "data/code/bad_code_1.py"): {
        1: "str",
        2: "str",
    },
    (proj_root() / "data/code/bad_code_2.py"): {
        0: "int",
    },
    (proj_root() / "data/code/dummy/dummy_2.py"): {
        0: "str",
    },
}
fdbks = simple_dataset._get_type_checker_feedback_iso(
    file2preds,
    max_workers=20,
)
for f in fdbks:
    f.pretty_print()


type_check_src:   0%|          | 0/3 [00:00<?, ?it/s]

Feedbacks:
MypyFeedback(position=CodePosition(line=14, column=12), message='Incompatible return value type (got "str", expected "int") ', error_code='return-value')
 1|  from typing import Any # SPOT
 2|  from typing import Any
 3|  
 4|  def fib(n: str) -> Any:
 5|      if n == 0:
 6|          return 0
 7|      elif n == 1:
 8|          return 1
 9|      else:
10|          return fib(n-1) + fib(n-2)
11|  
12|  def t_add(x: str, y: str) -> int:
13|      r = x + y
14|      return r
15|  
16|  x: int = fib(3)
17|  bad_y: str = 1
Feedbacks:
MypyFeedback(position=CodePosition(line=5, column=5), message='Argument 1 to "fib" has incompatible type "int"; expected "str" ', error_code='arg-type')
1|  from typing import Any # SPOT
2|  from bad_code_1 import fib
3|  
4|  i: int = 4
5|  fib(i)
6|  
Feedbacks:
MypyFeedback(position=CodePosition(line=4, column=10), message='Incompatible types in assignment (expression has type "int", variable has type "str") ', error_code='assignment')
1|  from typi

In [4]:
from spot.data import Path, type_check_src, type_check_src_in_project

src_to_check = simple_dataset.get_src_by_file(Path("bad_code_2.py"))
type_check_src(src_to_check, {0: "int"}).pretty_print()


Feedbacks:
from typing import Any # SPOT
from bad_code_1 import fib

i: int = 4
fib(i)



In [7]:
from spot.data import type_check_src, type_check_src_in_project
import shutil

src_to_check = simple_dataset.get_src_by_file(Path("bad_code_2.py"))
temp_dir = proj_root() / "mypy_temp/test_dir"
shutil.rmtree(temp_dir, ignore_errors=True)

type_check_src_in_project(
    src_to_check,
    {0: "int"},
    project_files=(proj_root() / "data/code").glob("**/*.py"),
    project_root=(proj_root() / "data/code"),
    temp_dir=temp_dir,
).pretty_print()


Feedbacks:
MypyFeedback(position=CodePosition(line=5, column=5), message='Argument 1 to "fib" has incompatible type "int"; expected "str" ', error_code='arg-type')
from typing import Any # SPOT
from bad_code_1 import fib

i: int = 4
fib(i)



In [1]:
%load_ext autoreload
%autoreload 2

import os
import pickle
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from typing import *

import pandas as pd
import plotly.express as px

from spot.data import GitRepo, ModuleRemapUnpickler
from spot.type_env import (
    AnnotPath,
    MypyChecker,
    SelectAnnotations,
    TypeInfAction,
    TypeInfEnv,
    TypeInfState,
    collect_annotations,
    mypy_checker,
)
from spot.utils import cst, proj_root, read_file, seq_flatten, tqdm, write_file

os.chdir(proj_root())

datadir = Path(os.getenv("datadir"))
repos_dir = datadir / "SPOT-data/repos"

useful_repos_path = proj_root() / "scripts" / "useful_repos.pkl"
rename_module = lambda n: "spot.data" if n == "spot.data_prepare" else n
with useful_repos_path.open("rb") as f:
    useful_repos: list[GitRepo] = ModuleRemapUnpickler(f, rename_module).load()

In [18]:
# loading pre-trained model and tokenizer
from spot.utils import get_data_dir

model_dir = "Salesforce/codet5-base"
# model_dir = datadir / "checkpoints/saved/SPOT-CodeT5-no_margin/"

import torch
from transformers import (
    DataCollatorForSeq2Seq,
    RobertaTokenizer,
    T5ForConditionalGeneration,
)
from transformers.models.t5 import T5ForConditionalGeneration

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
tokenizer: RobertaTokenizer = RobertaTokenizer.from_pretrained(model_dir)
model: T5ForConditionalGeneration = T5ForConditionalGeneration.from_pretrained(
    model_dir
).to(device)
max_target_length = 128


In [3]:
import shutil

inference_dir = Path("data/code_output/inference")
if inference_dir.exists():
    shutil.rmtree(inference_dir)
inference_dir.mkdir(parents=True)
write_file(inference_dir / "env_code_1.py", read_file("data/code/env_code_1.py"))
write_file(inference_dir / "env_code_2.py", read_file("data/code/env_code_2.py"))


In [9]:
from spot.data import mask_type_annots, output_ids_as_types, tokenize_masked

test_code = """
@dataclass
class GitRepo:
    author: str
    name: str
    url: str
    stars: int
    forks: int

    def authorname(self):
        return self.author + "__" + self.name

    def repo_dir(self, repos_dir: Path) -> Path:
        return repos_dir / "downloaded" / self.authorname()

    def download(self, repos_dir: Path, timeout=None) -> bool:
        pass
"""


def run_model(code: str, num_beams=16):
    masked = mask_type_annots((Path("no_source"), code))
    tks = tokenize_masked(masked, tokenizer, device)
    input_ids = tks["input_ids"]
    with torch.no_grad():
        loss = model.forward(**tks).loss
        dec = model.generate(
            input_ids,
            max_length=max_target_length,
            num_beams=num_beams,
            # do_sample=True,
        )[0]
    return {
        "loss": loss,
        "predicted_types": output_ids_as_types(dec, tokenizer),
        "labels": output_ids_as_types(tks["labels"][0], tokenizer),
        "generation": tokenizer.decode(dec),
        "input_ids": input_ids[0],
        "output_ids": dec,
        "annots_info": masked["annots_info"],
    }


result = run_model(test_code, num_beams=10)
result["loss"]


tensor(2.9077, device='cuda:0')

In [24]:
from spot import PythonType
from spot.type_env import apply_annotations


def type_to_annot(ty: PythonType) -> str:
    return cst.Annotation(cst.parse_expression(str(ty)))


def run_aug_model(src: Path, cwd: Path):
    result = run_model(read_file(src), num_beams=10)
    pred_annots = {
        info.path: type_to_annot(t)
        for info, t in zip(result["annots_info"], result["predicted_types"])
    }
    m1 = apply_annotations(cst.parse_module(read_file(src)), pred_annots)
    write_file(src, m1.code)
    checker_r = MypyChecker.check_project(src, cwd)
    pos_to_preds = {
        info.annot_range: str(ty)
        for info, ty in zip(result["annots_info"], result["predicted_types"])
    }
    return {
        "model_result": result,
        "module": m1,
        "checker_feedback": checker_r,
        "pos_to_preds": pos_to_preds,
    }


aug_r = run_aug_model(inference_dir / "env_code_2.py", inference_dir)


In [33]:
from spot.utils import patch_code_with_extra

print("---- predicted types ----")
print(aug_r["model_result"]["predicted_types"])
print("---- model output ----")
print(tokenizer.decode(aug_r["model_result"]["output_ids"], skip_special_tokens=False))
print("---- checker_feedback ----")
print(aug_r["checker_feedback"].output_str)

print("---- new input ----")
new_input = patch_code_with_extra(
    aug_r["module"].code,
    aug_r["pos_to_preds"],
    aug_r["checker_feedback"].error_dict["env_code_2.py"],
)
print(new_input)


---- model output ----
<pad><s><extra_id_0>int<extra_id_1>int<extra_id_2>int<extra_id_3>int<extra_id_4>int, y : int<extra_id_5>int<extra_id_6>Optional[int]<extra_id_7>int<extra_id_8>int<extra_id_9>Bar[int, int, int, float, float]</s>
---- checker_feedback ----
env_code_2.py:20:14: error: Incompatible types in assignment (expression has type "str", variable has type "int")  [assignment]
env_code_2.py:32:29: error: Argument 1 to "len" has incompatible type "int"; expected "Sized"  [arg-type]
env_code_2.py:35:6: error: "Bar" expects no type arguments, but 5 given  [type-arg]
Found 3 errors in 1 file (checked 1 source file)

---- new input ----
# Env example 2: some existing annotations

from typing import *


def fib(n: /* int */<extra_id_0>):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fib(n - 1) + fib(n - 2)


def foo(bar: /* int */<extra_id_1>):
    return fib(bar)


class Bar:
    z: /* int */<extra_id_2> = /* error: Incompatible types in

In [34]:
def run_model_with_extra(code: str, num_beams=16):
    input_ids = tokenizer.encode(code, return_tensors="pt").to(device)
    dec = model.generate(
        input_ids,
        max_length=max_target_length,
        num_beams=num_beams,
    )[0]
    return {
        "predicted_types": output_ids_as_types(dec, tokenizer),
        "generation": tokenizer.decode(dec),
    }


run_model_with_extra(new_input)


{'predicted_types': [int,
  int,
  int,
  int,
  int,
  int,
  None,
  int,
  int,
  Bar[int, int, int, float, float]],
 'generation': '<pad><s><extra_id_0>int<extra_id_1>int<extra_id_2>int<extra_id_3>int<extra_id_4>int<extra_id_5>int<extra_id_6>None<extra_id_7>int<extra_id_8>int<extra_id_9>Bar[int, int, int, float, float]</s>'}

In [5]:
# Step 1: Replace all types to predict with special tokens
print(tokenizer.decode(result["input_ids"]))


<s>
@dataclass
class GitRepo:
    author:<extra_id_0>
    name:<extra_id_1>
    url:<extra_id_2>
    stars:<extra_id_3>
    forks:<extra_id_4>

    def authorname(self):
        return self.author + "__" + self.name

    def repo_dir(self, repos_dir:<extra_id_5>) -><extra_id_6>:
        return repos_dir / "downloaded" / self.authorname()

    def download(self, repos_dir:<extra_id_7>, timeout=None) -><extra_id_8>:
        pass
</s>


In [28]:
# Step 2: Tokenize using Byte Pair Encoding (BPE)
print(tokenizer.convert_ids_to_tokens(result["input_ids"]))


['<s>', 'Ċ', '@', 'data', 'class', 'Ċ', 'class', 'ĠGit', 'Repo', ':', 'Ċ', 'ĠĠĠ', 'Ġauthor', ':', '<extra_id_0>', 'Ċ', 'ĠĠĠ', 'Ġname', ':', '<extra_id_1>', 'Ċ', 'ĠĠĠ', 'Ġurl', ':', '<extra_id_2>', 'Ċ', 'ĠĠĠ', 'Ġstars', ':', '<extra_id_3>', 'Ċ', 'ĠĠĠ', 'Ġfor', 'ks', ':', '<extra_id_4>', 'Ċ', 'Ċ', 'ĠĠĠ', 'Ġdef', 'Ġauthor', 'name', '(', 'self', '):', 'Ċ', 'ĠĠĠĠĠĠĠ', 'Ġreturn', 'Ġself', '.', 'author', 'Ġ+', 'Ġ"__', '"', 'Ġ+', 'Ġself', '.', 'name', 'Ċ', 'Ċ', 'ĠĠĠ', 'Ġdef', 'Ġrepo', '_', 'dir', '(', 'self', ',', 'Ġrepos', '_', 'dir', ':', '<extra_id_5>', ')', 'Ġ->', '<extra_id_6>', ':', 'Ċ', 'ĠĠĠĠĠĠĠ', 'Ġreturn', 'Ġrepos', '_', 'dir', 'Ġ/', 'Ġ"', 'down', 'loaded', '"', 'Ġ/', 'Ġself', '.', 'author', 'name', '()', 'Ċ', 'Ċ', 'ĠĠĠ', 'Ġdef', 'Ġdownload', '(', 'self', ',', 'Ġrepos', '_', 'dir', ':', '<extra_id_7>', ',', 'Ġtimeout', '=', 'None', ')', 'Ġ->', '<extra_id_8>', ':', 'Ċ', 'ĠĠĠĠĠĠĠ', 'Ġpass', 'Ċ', '</s>']


In [29]:
# Step 3: Let model predict a sequence of types using BPE
print(tokenizer.convert_ids_to_tokens(result["output_ids"]))


['<pad>', '<s>', '<extra_id_0>', 'str', '<extra_id_1>', 'str', '<extra_id_2>', 'str', '<extra_id_3>', 'List', '[', 'str', ']', 'Ġ+', 'ĠList', '[', 'str', ']', '<extra_id_4>', 'List', '[', 'str', ']', 'Ġ+', 'ĠList', '[', 'str', ']', 'Ġ+', 'ĠList', '[', 'str', ']', '<extra_id_5>', 'Path', '<extra_id_6>', 'Path', 'Ġ.', 'ĠPath', '<extra_id_7>', 'Path', 'Ġ.', 'ĠPath', '<extra_id_8>', 'Path', 'Ġ.', 'ĠPath', 'Ġ[', 'Ġstr', ']', '</s>']


In [30]:
# Step 4: Extract the predicted types
print(result["predicted_types"])


[str, str, str, Any, Any, Path, Path.Path, Path.Path, Path.Path[str]]
