In [1]:
import yaml
import pandas as pd

In [None]:
# Path to the YAML configuration file
config_file_path = 'demo_config.yml'  # Replace with actual path

# Function to read a YAML file and return a dictionary
def read_yaml_config(file_path):
    with open(file_path, 'r') as file:
        config_dict = yaml.safe_load(file)
    return config_dict

# Reading the configuration file
config_data = read_yaml_config(config_file_path)
config_data

In [3]:
df = pd.read_csv(config_data['path_to_dataset'])

In [4]:
df

Unnamed: 0,Datetime,Baseline (kWh),Flexible (kWh),Peak
0,12/17/2021 0:00,0.030926,-3.620000e-14,False
1,12/17/2021 0:01,0.031567,2.610000e-11,False
2,12/17/2021 0:02,0.031963,2.650000e-11,False
3,12/17/2021 0:03,0.032306,2.690000e-11,False
4,12/17/2021 0:04,0.032656,2.720000e-11,False
...,...,...,...,...
7195,12/21/2021 23:55,0.033474,1.021433e-02,False
7196,12/21/2021 23:56,0.033610,1.021549e-02,False
7197,12/21/2021 23:57,0.033553,1.021675e-02,False
7198,12/21/2021 23:58,0.033582,1.021811e-02,False


### Build an RDF graph from the config file

In [8]:
import rdflib
import yaml

def create_data_graph_from_config_v2(config_file_path):
    # Load the config file
    with open(config_file_path, 'r') as file:
        config = yaml.safe_load(file)

    # Initialize an RDF graph
    g = rdflib.Graph()

    # Namespace for your ontology
    efkpis = rdflib.Namespace("http://example.org/efkpis#")
    xsd = rdflib.Namespace("http://www.w3.org/2001/XMLSchema#")

    # Evaluation Window
    evaluation_window = efkpis.EvaluationWindow
    start_time = rdflib.Literal(config['evaluation_window']['start'], datatype=xsd.dateTime)
    end_time = rdflib.Literal(config['evaluation_window']['end'], datatype=xsd.dateTime)
    g.add((evaluation_window, efkpis.startTime, start_time))
    g.add((evaluation_window, efkpis.endTime, end_time))

    # Peak Timestamps
    for ts in config['peak_timestamps']['values']:
        timestamp = rdflib.Literal(ts, datatype=xsd.dateTime)
        if ts == config['peak_timestamps']['values'][0]:
            g.add((efkpis.HighLoadStartTimestamp, efkpis.hasValue, timestamp))
        else:
            g.add((efkpis.HighLoadEndTimestamp, efkpis.hasValue, timestamp))

    # Data Sources
    for data_source in config['data_sources']:
        data_source_node = rdflib.BNode()  # Unique identifier for each data source
        source_type = rdflib.Literal(data_source['source_type'])
        path_to_dataset = rdflib.Literal(data_source['path_to_dataset'])
        data_reading_method = rdflib.Literal(data_source['data_reading_method'])

        # Add general data source properties
        g.add((data_source_node, efkpis.dataSourceType, source_type))
        g.add((data_source_node, efkpis.pathToDataset, path_to_dataset))
        g.add((data_source_node, efkpis.dataReadingMethod, data_reading_method))

        # Add profiles specific to each data source
        for key, profile_data in data_source['pandas_parsing_specs'].items():
            profile_node = efkpis[key]  # e.g., efkpis:timestamps, efkpis:baseline_power_profile, etc.
            for prop, value in profile_data.items():
                literal_value = rdflib.Literal(value or "unknown")
                g.add((profile_node, efkpis[prop], literal_value))

    # Return the RDF graph
    return g

# Example usage
data_graph = create_data_graph_from_config_v2('demo_config_v2.yml')

# Serialize the graph to view its contents (optional)
# print(data_graph.serialize(format='turtle').decode("utf-8"))

data_graph.serialize(format='turtle')


'@prefix ns1: <http://example.org/efkpis#> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\nns1:EvaluationWindow ns1:endTime "2019-01-02T00:00:00+00:00"^^xsd:dateTime ;\n    ns1:startTime "2019-01-01T00:00:00+00:00"^^xsd:dateTime .\n\nns1:HighLoadEndTimestamp ns1:hasValue "2019-01-01T13:00:00+00:00"^^xsd:dateTime .\n\nns1:HighLoadStartTimestamp ns1:hasValue "2019-01-01T12:00:00+00:00"^^xsd:dateTime .\n\nns1:baseline_power_profile ns1:column_identifier "Baseline (kW)" ;\n    ns1:dtype "float" ;\n    ns1:quantity "power" ;\n    ns1:unit "kW" .\n\nns1:flexible_power_profile ns1:column_identifier "Flexible (kW)" ;\n    ns1:dtype "float" ;\n    ns1:quantity "power" ;\n    ns1:unit "kW" .\n\nns1:generic_quantity_profile ns1:column_identifier "Cost ($)" ;\n    ns1:dtype "float" ;\n    ns1:quantity "unknown" ;\n    ns1:unit "unknown" .\n\nns1:timestamps ns1:column_identifier "Datetime" ;\n    ns1:dtype "datetime" ;\n    ns1:quantity "time" ;\n    ns1:unit "unknown" .\n\n[] ns1:dataRead

### Query evaluation window parameters from the graph

In [15]:
# SPARQL query to get the evaluation window
query = """
PREFIX efkpis: <http://example.org/efkpis#>

SELECT ?startTime ?endTime
WHERE {
    efkpis:EvaluationWindow efkpis:startTime ?startTime ;
                           efkpis:endTime ?endTime .
}
"""

# Execute the query
qres = data_graph.query(query)

# Print results
for row in qres:
    print(f"Start Time: {row.startTime}, End Time: {row.endTime}")

Start Time: 2019-01-01T00:00:00+00:00, End Time: 2019-01-02T00:00:00+00:00


### Query high load start and end timestamp from the graph

In [14]:
# SPARQL query to get the high load timestamps
query = """
PREFIX efkpis: <http://example.org/efkpis#>

SELECT ?startTimestamp ?endTimestamp
WHERE {
    efkpis:HighLoadStartTimestamp efkpis:hasValue ?startTimestamp .
    efkpis:HighLoadEndTimestamp efkpis:hasValue ?endTimestamp .
}
"""

# Execute the query
qres = data_graph.query(query)

# Print results
for row in qres:
    print(f"High Load Start Timestamp: {row.startTimestamp}, High Load End Timestamp: {row.endTimestamp}")


High Load Start Timestamp: 2019-01-01T12:00:00+00:00, High Load End Timestamp: 2019-01-01T13:00:00+00:00


### Query load profile data specs

In [18]:
query = """
PREFIX ns1: <http://example.org/efkpis#>

SELECT ?profile ?columnIdentifier ?quantity ?dtype ?unit ?dataReadingMethod ?dataSourceType ?pathToDataset
WHERE {
    VALUES ?profile { ns1:baseline_power_profile ns1:flexible_power_profile ns1:generic_quantity_profile ns1:timestamps }
    ?dataSourceInfo ns1:dataReadingMethod ?dataReadingMethod ;
                    ns1:dataSourceType ?dataSourceType ;
                    ns1:pathToDataset ?pathToDataset .
    ?profile ns1:column_identifier ?columnIdentifier ;
             ns1:quantity ?quantity ;
             ns1:dtype ?dtype ;
             ns1:unit ?unit .
}
"""

# Execute the query on the RDF graph
qres = data_graph.query(query)

# Print results
for row in qres:
    print(f"Profile: {row.profile}, Column Identifier: {row.columnIdentifier}, "
          f"Quantity: {row.quantity}, Data Type: {row.dtype}, Unit: {row.unit}, "
          f"Data Reading Method: {row.dataReadingMethod}, Data Source Type: {row.dataSourceType}, "
          f"Path To Dataset: {row.pathToDataset}")

Profile: http://example.org/efkpis#baseline_power_profile, Column Identifier: Baseline (kW), Quantity: power, Data Type: float, Unit: kW, Data Reading Method: pandas, Data Source Type: csv, Path To Dataset: path/to/baseline_power_data.csv
Profile: http://example.org/efkpis#baseline_power_profile, Column Identifier: Baseline (kW), Quantity: power, Data Type: float, Unit: kW, Data Reading Method: pandas, Data Source Type: csv, Path To Dataset: path/to/flexible_power_data.csv
Profile: http://example.org/efkpis#baseline_power_profile, Column Identifier: Baseline (kW), Quantity: power, Data Type: float, Unit: kW, Data Reading Method: pandas, Data Source Type: csv, Path To Dataset: path/to/energy_cost_data.csv
Profile: http://example.org/efkpis#flexible_power_profile, Column Identifier: Flexible (kW), Quantity: power, Data Type: float, Unit: kW, Data Reading Method: pandas, Data Source Type: csv, Path To Dataset: path/to/baseline_power_data.csv
Profile: http://example.org/efkpis#flexible_pow