# Generate pCBR predictions on MIND

In [1]:
import subprocess
import wandb
import os
import sys
import shutil
import polars as pl
import pandas as pd

os.chdir("./Consilience-Drug-Repurposing")
sys.path.append("./path-based/probCBR/prob_cbr/")

## Prepare files for pCBR execution

Create required vocabulary files

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--create_vocab",
    ]
)

Calculate entity similarities

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--calculate_ent_similarity",
    ]
)

### Calculate subgraph
* Note the subgraph is _NOT_ the same as the previously generated subgraph seen here [notebook](./4_Generate_CBR_Predictions.ipynb)
* Operation should be run in parallel or you'll wait a week for your subgraph.
* Choose the number of jobs you want, create multiple tmux sessions and have at it

In [None]:
# repeat for all jobs; in this example 0 - 9.

subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--get_paths_parallel",
        "--total_jobs",
        "10",
        "--current_job",
        "0",
    ]
)

combine the created subgraph files into 1

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--combine_paths",
    ]
)

### Create prior map
* The prior map calculates the probability a of a path given a query relation
* Like above, run in parallel unless you want to wait 5 hours for this 

In [None]:
# repeat for all jobs; in this example 0 - 9.

subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--calculate_prior_map_parallel",
        "--total_jobs",
        "10",
        "--current_job",
        "0",
    ]
)

Combine the prior maps

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--combine_prior_map",
    ]
)

### Create Precision map
* Calculates the ratio of how many times a path was successful when executed compared to how many times the path was executed.
* Similar to subgraph generation, and prior generation, should be parallelized or it will take a couple days

In [None]:
# repeat for all jobs; in this example 0 - 9.

subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--calculate_precision_map_parallel",
        "--total_jobs",
        "10",
        "--current_job",
        "0",
    ]
)

Combine the precision maps

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/data/preprocessing.py",
        "--dataset_name",
        "MIND",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
        "--combine_precision_map",
    ]
)

## Run pCBR model

In [None]:
subprocess.run(
    [
        "python",
        "path-based/probCBR/prob_cbr/pr_cbr_test.py",
        "--dataset_name",
        "MIND",
        "--subgraph_file_name",
        "paths_1000_pathLen_3_noLoops_invEdges_combined.pkl",
        "--k_adj",
        "10",
        "--max_path_len",
        "3",
        "--linkage 0",
        "--prevent_loops",
        "--add_inv_edges",
        "--test",
    ]
)