In [1]:
import papermill as pm

from kgtk.configure_kgtk_notebooks import ConfigureKGTK
from kgtk.functions import kgtk, kypher

In [3]:
input_path = "/data/amandeep"
output_path = "/data/amandeep"
project_name = "create-wikidata-dwd"

kgtk_path = "/data/amandeep/Github/kgtk"
kernel_name = "kgtk-env-ckg07"

In [None]:
ck = ConfigureKGTK([], kgtk_path=kgtk_path)
ck.configure_kgtk(input_graph_path=input_path,
                  output_path=output_path,
                  project_name=project_name,
                 graph_cache_path=None)

## Run the Import Wikidata Notebook 

In [None]:
# Parameters for Import Wikidata
json_file_path = "/data/amandeep/wikidata-20220519"
import_wikidata_path = "/data/amandeep/wikidata-20220519"
wikidata_project_name = "import-wikidata"
wikidata_json_file = "latest-all.json.bz2"
sort_command = 'sort'

In [None]:
pm.execute_notebook(
    "import-wikidata.ipynb",
    os.environ["TEMP"] + "/import-wikidata.out.ipynb",
    kernel_name=kernel_name,
    parameters=dict(
        input_path = json_file_path,
        output_path = import_wikidata_path,
        project_name = wikidata_project_name,
        wikidata_json_file = wikidata_json_file,
        kgtk_path = kgtk_path,
        sort_command = sort_command
    )
)

## Run the Useful Files Notebook to compute `isa` and `p279star` files only

In [None]:
# Parameters for First run on Useful Files
first_useful_files_input_path = f"{import_wikidata_path}/{wikidata_project_name}"
first_useful_files_output_path = import_wikidata_path
first_useful_files_project_name = "useful-files"
first_useful_files = 'claims,label_all,alias_all,description_all'
first_useful_files_for_cache = 'claims'

In [None]:
pm.execute_notebook(
    "Wikidata-Useful-Files.ipynb",
    os.environ["TEMP"] + "/Wikidata-Useful-Files.out.ipynb",
    kernel_name=kernel_name,
    parameters=dict(
        input_path = first_useful_files_input_path,
        output_path = first_useful_files_output_path,
        project_name = first_useful_files_project_name,
        kgtk_path = kgtk_path,
        files = first_useful_files,
        files_for_cache=first_useful_files_for_cache,
        compute_pagerank=False,
        compute_degrees=False,
        debug=False,
        compute_isa_star=False,
        compute_p31p279_star=False
    )
)

In [None]:
!cp $import_wikidata_path/$first_useful_files_project_name/derived.isa.tsv.gz $import_wikidata_path
!cp $import_wikidata_path/$first_useful_files_project_name/derived.P279star.tsv.gz $import_wikidata_path

## Run Wikidata Subsets Notebook

In [None]:
subset_input_path = import_wikidata_path
subset_output_path = "/data/amandeep"


subset_project_name = "wikidata-20220519-dwd-v5"

subset_files = 'claims,label_all,alias_all,description_all,item,qualifiers,datatypes,types,isa,p279star'

# Classes to remove
remove_classes = "Q7318358,Q13442814"

languages = "en,ru,es,zh-cn,de,it,nl,pl,fr,pt,sv"

In [None]:
pm.execute_notebook(
    "Wikidata-Subsets.ipynb",
    os.environ["TEMP"] + "/Wikidata-Subsets.out.ipynb",
    kernel_name=kernel_name,
    parameters=dict(
        input_path = subset_input_path,
        output_path = subset_output_path,
        project_name = subset_project_name,
        kgtk_path = kgtk_path,
        files = subset_files,
        remove_classes = remove_classes,
        languages = languages
    )
)