In [13]:
%load_ext autoreload
%autoreload 2

from typet5.utils import proj_root, os
os.chdir(proj_root())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
from typet5.static_analysis import PythonProject
from typet5.utils import *
from typet5.model import ModelWrapper
from typet5.visualization import pretty_print_dict, assert_eq
from typet5.experiments.type4py import eval_type4py_on_projects
from typet5.function_dataset import data_project_from_dir


dataset_name = "ManyTypes4Py"
# dataset_name = "InferTypes4Py"
# dataset_name = "TinyEval"

# test_projects = [PythonProject.parse_from_root(proj_root(), ignore_dirs={".venv", "data"})]

repos_dir = get_dataset_dir(dataset_name) / "repos" / "test"
test_repo_paths = [f for f in repos_dir.iterdir() if f.is_dir()]
test_projects = pmap(
    data_project_from_dir,
    test_repo_paths,
    desc="Loading test projects",
)
cache = PickleCache(Path(f"caches/run_type4py"))
# cache.clear()

eval_r = cache.cached(f"{dataset_name}.pkl", lambda: eval_type4py_on_projects(test_projects, max_workers=4))


Loading test projects: 100%|██████████| 3/3 [00:10<00:00,  3.42s/it]


In [15]:
n_annots = sum([e.get_signature().n_annots() for p in test_projects for e in p.all_elems()])
n_labels = sum([e.n_annotated() for lm in eval_r.label_maps.values() for e in lm.values()])
print(f"n_annots: {n_annots}, n_labels: {n_labels}")
print(f"Ratio: {n_labels / n_annots}")

n_annots: 4614, n_labels: 2659
Ratio: 0.5762895535327265


In [16]:
from typet5.static_analysis import SignatureErrorAnalysis, AccuracyMetric
from typet5.experiments.typet5 import accs_as_table_row


common_names = ModelWrapper.load_common_type_names(
    get_model_dir() / "model-v7--TrainingConfig(drop_env_types=False)"
)
metrics = AccuracyMetric.default_metrics(common_type_names=common_names)
# acc_metric = AccuracyMetric(common_type_names=ubiq_names)

accs = {
    m.name: SignatureErrorAnalysis(
        eval_r.pred_maps,
        eval_r.label_maps,
        m,
        error_on_mismatched_signature=False,
    ).accuracies
    for m in metrics
}

accs_as_table_row(accs)
pretty_print_dict(accs)

Accuracies on all types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
22.63 & 21.11 & 22.35 & 9.61 & 22.64
Accuracies on common types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
33.33 & 32.08 & 33.47 & 16.54 & 29.83
Accuracies on rare types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
0.12 & 0.25 & 0.14 & 0.98 & 0.17
full_acc:
   full_acc: 22.63% (count=2.5k)
   full_acc_by_cat:
      FuncArg: 19.89% (count=1.6k)
      FuncReturn: 33.12% (count=640)
      ClassAtribute: 12.50% (count=240)
      GlobalVar: 75.00% (count=4)
   full_acc_by_simple:
      complex: 8.81% (count=329)
      simple: 24.70% (count=2.2k)
   full_acc_label_size: 1.7436
   full_acc_pred_size: 1.2612
   full_acc_ignored_labels: 0
   n_skipped_types: 23
   n_missing_types: 113
full_acc_common:
   full_acc_common: 33.33% (count=1.7k)
   full_acc_common_by_cat:
      F

In [19]:
from typet5.experiments.utils import collect_project_type_errors, count_type_errors

pred_maps = eval_r.pred_maps

bin_paths = (
    [
        None,
        Path("/home/jiayi/Projects/typilus/.venv/bin"),
        Path("/home/jiayi/Projects/type4py/.venv/bin"),
    ]
    if dataset_name == "InferTypes4Py"
    else [None] * len(test_projects)
)

background_errors = pmap(
    collect_project_type_errors,
    test_projects,
    [{k: v.drop_types() for k, v in pred.items()} for pred in pred_maps.values()],
    [Path("mypy_temp")] * len(test_projects),
    bin_paths,
    desc=f"Counting background type errors",
)

background_count = count_type_errors(seq_flatten(background_errors))
print("Background errors:", background_count)

user_errors = pmap(
    collect_project_type_errors,
    test_projects,
    list(eval_r.label_maps.values()),
    [Path("mypy_temp")] * len(test_projects),
    bin_paths,
    desc=f"Counting user annotation type errors",
)

user_error_count = count_type_errors(seq_flatten(user_errors))
print("User annotated errors:", user_error_count)

all_errors = pmap(
    collect_project_type_errors,
    test_projects,
    list(pred_maps.values()),
    [Path("mypy_temp")] * len(test_projects),
    bin_paths,
    desc=f"Counting type errors",
)

error_count = count_type_errors(seq_flatten(all_errors)) - background_count
print("Error Count:", error_count)

Counting background type errors: 100%|██████████| 3/3 [00:10<00:00,  3.43s/it]

Background errors: 20



Counting user annotation type errors: 100%|██████████| 3/3 [00:10<00:00,  3.65s/it]

User annotated errors: 122



Counting type errors: 100%|██████████| 3/3 [00:11<00:00,  3.82s/it]

Error Count: 3495





In [20]:
n_elems = sum(len(list(p.all_elems())) for p in test_projects)
n_labels = [e.n_annotated() for lm in eval_r.label_maps.values() for e in lm.values()]
print("Coherence Errors per label:", user_error_count / sum(n_labels))

Coherence Errors per label: 0.04588191049266641


In [None]:
if False:
    # for inspecting small projects only
    for project, sig_map in eval_r.pred_maps.items():
        print("=" * 20, project, "=" * 20)
        for path, sig in sig_map.items():
            print("\t", path, ":", str(sig))

	 goodreads/Book.__init__ : (title: bool, author: bool, original_publication_year: bool, str_distance: bool) -> None
	 goodreads/Book.__str__ : () -> str
	 goodreads/GoodreadsBook.__init__ : (title: str, author: str, original_publication_year: str, str_distance: str, num_ratings: int, node: bool) -> None
	 goodreads/GoodreadsBook.get_goodreads_id : () -> str
	 goodreads/search_for_book : (title: str) -> str
	 goodreads/suggest_book_from_results : (searched_title: List[str], root: str) -> str
	 goodreads/get_books_from_file : (fname: str) -> None
	 goodreads/get_obviously_correct_book : (relevant_books: bool) -> bool
	 goodreads/resolve_via_human : (query: dict, relevant_books: str) -> str
	 goodreads/save_chosen_books : (person: str, chosen_books: bool) -> None
	 goodreads/get_output_fname : (person: str) -> str
	 goodreads/confirm : (msg: str) -> bool
	 goodreads/GoodreadsResolutionCache.__init__ : (cache: bool, is_dirty: bool) -> None
	 goodreads/GoodreadsResolutionCache.load : () ->

: 