In [1]:
%load_ext autoreload
%autoreload 2

from spot.utils import proj_root, os
os.chdir(proj_root())

In [2]:
from spot.static_analysis import PythonProject
from spot.utils import *
from spot.model import ModelWrapper
from spot.visualization import pretty_print_dict, assert_eq
from spot.experiments.type4py import eval_type4py_on_projects
from spot.function_dataset import data_project_from_dir


dataset_name = "ManyTypes4Py"
# dataset_name = "InferTypes4Py"
# dataset_name = "TinyEval"

# test_projects = [PythonProject.from_root(proj_root(), ignore_dirs={".venv", "data"})]

repos_dir = get_dataset_dir(dataset_name) / "repos" / "test"
test_repo_paths = [f for f in repos_dir.iterdir() if f.is_dir()]
test_projects = pmap(
    data_project_from_dir,
    test_repo_paths,
    desc="Loading test projects",
)
cache = PickleCache(Path(f"caches/run_type4py"))
# cache.clear()

eval_r = cache.cached(f"{dataset_name}.pkl", lambda: eval_type4py_on_projects(test_projects, max_workers=4))


Loading test projects: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


In [3]:
from spot.static_analysis import SignatureErrorAnalysis, AccuracyMetric
from spot.experiments.typet5 import accs_as_table_row


common_names = ModelWrapper.load_common_type_names(
    get_model_dir() / "model-v7--TrainingConfig(drop_env_types=False)"
)
metrics = AccuracyMetric.default_metrics(common_type_names=common_names)
# acc_metric = AccuracyMetric(common_type_names=ubiq_names)

accs = {
    m.name: SignatureErrorAnalysis(
        eval_r.pred_maps,
        eval_r.label_maps,
        m,
        error_on_mismatched_signature=False,
    ).accuracies
    for m in metrics
}

accs_as_table_row(accs)
pretty_print_dict(accs)

Accuracies on all types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
34.32 & 34.55 & 35.87 & 19.94 & 35.61
Accuracies on common types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
51.72 & 49.57 & 51.15 & 31.76 & 47.51
Accuracies on rare types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
0.56 & 12.62 & 13.46 & 3.60 & 14.15
full_acc:
   full_acc: 34.32% (count=15.2k)
   full_acc_by_cat:
      FuncArg: 28.62% (count=7.5k)
      FuncReturn: 45.84% (count=5.7k)
      ClassAtribute: 22.73% (count=1.8k)
      GlobalVar: 13.33% (count=105)
   full_acc_by_simple:
      complex: 14.42% (count=1.7k)
      simple: 36.92% (count=13.4k)
   full_acc_label_size: 1.4182
   full_acc_pred_size: 1.2303
   full_acc_ignored_labels: 0
   n_skipped_types: 271
   n_missing_types: 323
full_acc_common:
   full_acc_common: 51.72% (count=10.0k)
   full_acc_common_b

In [9]:
from spot.experiments.utils import count_project_type_errors, count_type_errors

pred_maps = eval_r.pred_maps

background_errors = pmap(
    count_project_type_errors,
    test_projects,
    [{k: v.drop_types() for k, v in pred.items()} for pred in pred_maps.values()],
    [Path("mypy_temp")] * len(test_projects),
    [None] * len(test_projects),
    desc=f"Counting background type errors",
)

background_count = count_type_errors(seq_flatten(background_errors))
print("Background Brrors:", background_count)

all_errors = pmap(
    count_project_type_errors,
    test_projects,
    list(pred_maps.values()),
    [Path("mypy_temp")] * len(test_projects),
    [None] * len(test_projects),
    desc=f"Counting type errors",
)

error_count = count_type_errors(seq_flatten(all_errors)) - background_count
print("Error Count:", error_count)

Counting type errors:  28%|██▊       | 14/50 [00:05<00:13,  2.69it/s]



Counting type errors: 100%|██████████| 50/50 [00:28<00:00,  1.73it/s]

Error Count: 51





In [12]:
sum(1 for p in test_projects for e in p.all_elems())

16881

In [None]:
if False:
    # for inspecting small projects only
    for project, sig_map in eval_r.pred_maps.items():
        print("=" * 20, project, "=" * 20)
        for path, sig in sig_map.items():
            print("\t", path, ":", str(sig))

	 goodreads/Book.__init__ : (title: bool, author: bool, original_publication_year: bool, str_distance: bool) -> None
	 goodreads/Book.__str__ : () -> str
	 goodreads/GoodreadsBook.__init__ : (title: str, author: str, original_publication_year: str, str_distance: str, num_ratings: int, node: bool) -> None
	 goodreads/GoodreadsBook.get_goodreads_id : () -> str
	 goodreads/search_for_book : (title: str) -> str
	 goodreads/suggest_book_from_results : (searched_title: List[str], root: str) -> str
	 goodreads/get_books_from_file : (fname: str) -> None
	 goodreads/get_obviously_correct_book : (relevant_books: bool) -> bool
	 goodreads/resolve_via_human : (query: dict, relevant_books: str) -> str
	 goodreads/save_chosen_books : (person: str, chosen_books: bool) -> None
	 goodreads/get_output_fname : (person: str) -> str
	 goodreads/confirm : (msg: str) -> bool
	 goodreads/GoodreadsResolutionCache.__init__ : (cache: bool, is_dirty: bool) -> None
	 goodreads/GoodreadsResolutionCache.load : () ->

: 