# Preamble

Set up paths and environment variables

In [1]:
model_path = "t2wml_project/"
data_file = "{}/Ethiopia_Daily_Data_Multiple_Stations.xlsx".format(model_path)
model_file = "{}/discharge_model.yaml".format(model_path)
wikifier_file = "{}/wikify_region_output.csv".format(model_path)
sheet_name = "Baro"
output_path = "data/"
dataset_original = "{}/baro_example_kgtk.tsv".format(output_path)
dataset_id = "{}/graph_add_id.tsv".format(output_path)
dataset_implode = "{}/graph_implode.tsv".format(output_path)
output_file = "{}/Baro-Masha_Discharge_2015-10_to_2018-10.csv".format(output_path)

In [2]:
import pandas as pd

# Generate KGTK file from T2WML

Run T2WML API to generate KGTK graph file (Can also be done from T2WML GUI via download kgtk)

In [3]:
from t2wml.knowledge_graph import KnowledgeGraph

kg = KnowledgeGraph.generate_from_files(data_file, sheet_name, model_file, wikifier_file)
kg.save_file(dataset_original, "kgtk")

# Process KGTK file

Run kgtk implode to fill missing columns in graph file

In [4]:
!kgtk add-id -i $dataset_original -o $dataset_id --id-style node1-label-num

In [5]:
!kgtk implode -i $dataset_id -o $dataset_implode --without language_suffix si_units units_node 2>/dev/null

# Run Kypher

Generate table via KGTK Kypher with Cypher queries to graph

In [6]:
!kgtk query -i $dataset_implode \
--match '(n1)-[r]->(n_discharge), \
    (r)-[r_date:P585]->(n_date), \
    (r)-[r_area:P2046]->(n_area), \
    (r)-[r_location:P625]->(n_location)' \
--return 'distinct kgtk_date_date(n_date) as date, \
    n_discharge as `discharge(cumecs)`, \
    n_area as `area(sq km)`, \
    n_location.`kgtk:latitude` as latitude, n_location.`kgtk:longitude` as longitude' \
-o $output_file

# Clean file and generate final output

Final output is data/Baro-Masha_Discharge_2015-10_to_2018-10.csv

In [7]:
csv_table=pd.read_table(output_file,sep='\t')
csv_table['date'] = csv_table['date'].str[1:] # Remove caret symbol from dates
csv_table = csv_table.sort_values(by="date") # Sort by date
csv_table.to_csv(output_file,index=False)