In [3]:

from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDFS, RDF, XSD
import json
from textwrap import dedent

# Pfad zu deinem Graphen (anpassen, falls anders)
TTL_PATH = r"D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test_filled.ttl"

g = Graph()
g.parse(TTL_PATH, format="turtle")
print(f"Graph geladen mit {len(g)} Tripeln.")

# Namespaces (so wie in deinem Beispiel)
AG = Namespace("http://www.semanticweb.org/AgentProgramParams/")
OP = Namespace("http://www.semanticweb.org/AgentProgramParams/op_")
DP = Namespace("http://www.semanticweb.org/AgentProgramParams/dp_")

g.bind("ag", AG)
g.bind("op", OP)
g.bind("dp", DP)

from rdflib import URIRef





Graph geladen mit 478 Tripeln.


In [23]:
def get_local_name(uri: str) -> str:
    """
    Gibt den lokalen Namen einer URI zurück, also alles nach dem letzten '#' oder '/'.
    """
    if "#" in uri:
        return uri.rsplit("#", 1)[-1]
    return uri.rstrip("/").rsplit("/", 1)[-1]


def build_kg_variables_from_info(info: dict) -> dict:
    """
    Baut aus dem Ergebnis von run_var_query_for_program(...) die Struktur:

    {
      "inputs":   [ { "uri": ..., "kg_name": ..., "role": "input"   }, ... ],
      "outputs":  [ { "uri": ..., "kg_name": ..., "role": "output"  }, ... ],
      "internals":[ { "uri": ..., "kg_name": ..., "role": "internal"}, ... ],
      "usedvars": [ { "uri": ..., "kg_name": ..., "role": "used"    }, ... ]
    }
    """

    def mk_list(key: str, role: str):
        out = []
        for uri in info.get(key, []):
            local_name = get_local_name(uri)
            out.append(
                {
                    "uri": uri,
                    "kg_name": local_name,
                    "role": role,
                }
            )
        return out

    return {
        "inputs":    mk_list("inputs",    "input"),
        "outputs":   mk_list("outputs",   "output"),
        "internals": mk_list("internals", "internal"),
        "usedvars":  mk_list("usedvars",  "used"),
    }


In [24]:
def get_all_program_uris(graph: Graph):
    """
    Liefert alle Individuen vom Typ ag:class_Program.
    """
    return sorted(
        {str(p) for p in graph.subjects(RDF.type, AG.class_Program)}
    )

def clear_all_consistency_reports(graph: Graph):
    """Entfernt alle dp:hasConsistencyReport-Triple für alle Programme."""
    for p_uri in get_all_program_uris(graph):
        graph.remove((URIRef(p_uri), DP.hasConsistencyReport, None))
        
from rdflib import URIRef

def run_var_query_for_program(graph: Graph, program_uri: str):
    """
    Holt Inputs, Outputs, Internals, usedvars und ProgramCode direkt
    aus dem Graphen, ohne SPARQL.
    """
    prog = URIRef(program_uri)

    inputs = sorted({str(o) for o in graph.objects(prog, OP.hasInputVariable)})
    outputs = sorted({str(o) for o in graph.objects(prog, OP.hasOutputVariable)})
    internals = sorted({str(o) for o in graph.objects(prog, OP.hasInternalVariable)})
    usedvars = sorted({str(o) for o in graph.objects(prog, OP.usesVariable)})

    # Ersten Programcode nehmen, falls vorhanden
    code_literal = next(graph.objects(prog, DP.hasProgramCode), None)
    code_str = str(code_literal) if code_literal is not None else None

    print("Ergebnisse für Programm:", program_uri, "Code gefunden:", code_str is not None)
    lang = next(graph.objects(prog, DP.hasProgrammingLanguage), None)
    lang_str = str(lang) if lang is not None else None

    return {
        "program_uri": program_uri,
        "code": code_str,
        "programming_language": lang_str,
        "inputs": inputs,
        "outputs": outputs,
        "internals": internals,
        "usedvars": usedvars,
        
    }


In [25]:
import re

def extract_variables_from_python(program_code: str) -> dict:
    """
    Versucht, aus dem generierten Python Code für ein PLC Programm
    Inputs, Outputs und interne Variablen zu extrahieren.

    Heuristik:
      - Inputs: Parameter in der def Signatur (ohne 'V_' Präfix)
      - Outputs: Keys in return {'Name': ...} (ohne 'V_')
      - Interne: links vom '=' in Zuweisungen, die keine Inputs oder Outputs sind
    """
    inputs = set()
    outputs = set()
    internals = set()

    lines = program_code.splitlines()

    # 1) Funktionssignatur -> Eingangsvariablen
    sig_pattern = re.compile(r'^\s*def\s+\w+\((.*?)\):')
    for line in lines:
        m = sig_pattern.match(line)
        if not m:
            continue
        params = m.group(1)
        for part in params.split(','):
            part = part.strip()
            if not part:
                continue
            # "name: type" oder "name"
            name = part.split(':')[0].strip()
            if not name:
                continue
            if name.startswith('V_'):
                continue  # Funktionsbaustein Hilfsvariablen ignorieren
            inputs.add(name)

    # 2) return {'Name': ...} -> Outputvariablen
    return_blocks = re.findall(r"return\s*\{([^}]*)\}", program_code, flags=re.DOTALL)
    for block in return_blocks:
        for key in re.findall(r"'([^']+)'\s*:", block):
            if key and not key.startswith('V_'):
                outputs.add(key)

    # 3) Zuweisungen -> interne Variablen
    assign_pattern = re.compile(r'^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=')
    for line in lines:
        stripped = line.lstrip()
        if stripped.startswith(("def ", "class ")):
            continue
        m = assign_pattern.match(line)
        if not m:
            continue
        name = m.group(1)
        if name.startswith('V_'):
            continue
        # alles, was nicht bereits Input oder Output ist, als intern zählen
        if name not in inputs and name not in outputs:
            internals.add(name)

    return {
        "inputs": sorted(inputs),
        "outputs": sorted(outputs),
        "internals": sorted(internals),
    }


In [26]:
def match_kg_var_to_code(
    kg_name: str,
    code_vars: list[str],
    program_name: str | None = None,
    debug: bool = False
) -> list[str]:
    """
    Versucht, eine KG Variable mit Variablen im Code zu matchen.

    Verhalten:
    - Wenn program_name=None: Programmnamen NICHT wegschneiden
    - Wenn program_name gesetzt: Programmnamen am Anfang entfernen
    """

    core = kg_name

    if debug:
        print(f"\n[match_kg_var_to_code] Original KG-Name: {kg_name}")

    # 1) Var_-Präfix entfernen
    if core.startswith("Var_"):
        core = core[4:]
        if debug:
            print(f"[match_kg_var_to_code] ohne 'Var_': {core}")

    # 2) Programmnamen nur dann entfernen, wenn er explizit übergeben wird
    if program_name:
        prefix1 = program_name + "__dot__"
        prefix2 = program_name + "_"
        if core.startswith(prefix1):
            core = core[len(prefix1):]
            if debug:
                print(f"[match_kg_var_to_code] ohne '{prefix1}': {core}")
        elif core.startswith(prefix2):
            core = core[len(prefix2):]
            if debug:
                print(f"[match_kg_var_to_code] ohne '{prefix2}': {core}")

    # 3) __dot__ in echte Punkte zurückverwandeln
    core = core.replace("__dot__", ".")
    if debug:
        print(f"[match_kg_var_to_code] nach __dot__ -> '.': {core}")

    candidates = [core]

    # Sonderfall GVL_XXX_YYY -> GVL_XXX.YYY (optional)
    if core.startswith("GVL_"):
        parts = core.split("_", 2)
        if len(parts) >= 3:
            alt = parts[0] + "_" + parts[1] + "." + parts[2]
            candidates.append(alt)
            if debug:
                print(f"[match_kg_var_to_code] zusätzlicher Kandidat: {alt}")

    if debug:
        print(f"[match_kg_var_to_code] Kandidaten: {candidates}")
        print(f"[match_kg_var_to_code] Pool-Größe: {len(code_vars)}")

    matching = []
    for cv in code_vars:
        for cand in candidates:
            if cv == cand or cand in cv:
                matching.append(cv)
                if debug:
                    print(f"[match_kg_var_to_code] Match gefunden: {cv} für {cand}")
                break

    if debug and not matching:
        print("[match_kg_var_to_code] Keine Matches gefunden.")

    return matching


In [27]:
from urllib.parse import unquote

def get_program_name_from_uri(program_uri: str) -> str:
    """
    Holt den lokalen Programmnamen ohne 'Program_'.
    z. B. '.../Program_MBS_OperatingModes' -> 'MBS_OperatingModes'
    """
    local = program_uri.rsplit("/", 1)[-1]          # 'Program_MBS_OperatingModes'
    if local.startswith("Program_"):
        local = local[len("Program_"):]             # 'MBS_OperatingModes'
    return unquote(local)

In [28]:
def st_candidates_from_kg(kg_name: str, program_name: str | None) -> list[str]:
    core = kg_name
    if core.startswith("Var_"):
        core = core[4:]
    if program_name:
        for pref in (program_name + "__dot__", program_name + "_"):
            if core.startswith(pref):
                core = core[len(pref):]
                break
    cand = core.replace("__dot__", ".")
    return [core, cand] if cand != core else [core]

def st_role_ok(role: str, read: bool, written: bool) -> tuple[bool, str]:
    if role == "input":
        ok = read and not written
        return ok, ("Input gefunden und nur gelesen." if ok else "Input nicht gefunden oder beschrieben.")
    if role == "output":
        ok = written and not read
        return ok, ("Output gefunden und nur geschrieben." if ok else "Output nicht gefunden oder auch gelesen.")
    # intern/used
    ok = read and written
    return ok, ("Interne Variable gelesen und geschrieben." if ok else "Interne Variable nicht sowohl gelesen als auch geschrieben.")

In [29]:
def build_per_variable_report_local(program_info: dict,
                                    variables_from_code: dict,
                                    kg_variables: dict,
                                    debug: bool = False) -> dict:
    """
    Baut einen Analyse Report im Stil des früheren LLM Outputs, aber deterministisch.

    Rückgabe:
    {
      "program_uri": "...",
      "variables_from_code": {...},
      "per_variable_report": [ {...}, ... ]
    }
    """
    program_uri = program_info["program_uri"]
    program_name = get_program_name_from_uri(program_uri)
    lang = (program_info.get("programming_language") or "").upper()
    per_report = []
    if lang == "ST":
        read_set = variables_from_code["read"]
        write_set = variables_from_code["written"]

        for role_key, kg_role in [
            ("inputs", "input"),
            ("outputs", "output"),
            ("internals", "internal"),
            ("usedvars", "used"),
        ]:
            for v in kg_variables.get(role_key, []):
                kg_name = v["kg_name"]
                cands = st_candidates_from_kg(kg_name, program_name)
                found = any(c in read_set or c in write_set for c in cands)
                read_hit = any(c in read_set for c in cands)
                write_hit = any(c in write_set for c in cands)
                present, comment = st_role_ok(kg_role, read_hit, write_hit)

                per_report.append({
                    "kg_uri": v["uri"],
                    "kg_role": kg_role,
                    "kg_name": kg_name,
                    "matching_code_variables": [c for c in cands if c in read_set or c in write_set] if found else [],
                    "present_in_code": found,
                    "comment": comment,
                })
        return {
            "program_uri": program_uri,
            "variables_from_code": variables_from_code,
            "per_variable_report": per_report,
        }
    elif lang == "FBD":
        inputs_from_code = variables_from_code["inputs"]
        outputs_from_code = variables_from_code["outputs"]
        internals_from_code = variables_from_code["internals"]

        if debug:
            print("\n" + "=" * 80)
            print(f"[build_per_variable_report_local] Programm: {program_name}")
            print(f"[build_per_variable_report_local] URI: {program_uri}")
            print(f"[build_per_variable_report_local] Inputs aus Code:    {inputs_from_code}")
            print(f"[build_per_variable_report_local] Outputs aus Code:   {outputs_from_code}")
            print(f"[build_per_variable_report_local] Internals aus Code: {internals_from_code}")
            print("=" * 80)

        for role_key, kg_role in [
            ("inputs", "input"),
            ("outputs", "output"),
            ("internals", "internal"),
            ("usedvars", "used"),
        ]:
            kg_list = kg_variables.get(role_key, [])

            if debug:
                print(f"\n[build_per_variable_report_local] Rolle '{kg_role}' ({role_key})")
                print(f"[build_per_variable_report_local] Anzahl KG-Variablen: {len(kg_list)}")

            for v in kg_list:
                kg_uri = v["uri"]
                kg_name = v["kg_name"]

                # Pool wählen, aus dem wir matchen
                if kg_role == "input":
                    pool = inputs_from_code
                    base = "Input"
                elif kg_role == "output":
                    pool = outputs_from_code
                    base = "Output"
                elif kg_role == "internal":
                    pool = internals_from_code
                    base = "Interne Variable"
                else:  # used
                    pool = inputs_from_code + outputs_from_code + internals_from_code
                    base = "Used Variable"

                if debug:
                    print(f"\n  - KG-Variable: {kg_name}")
                    print(f"    URI: {kg_uri}")
                    print(f"    Rolle: {kg_role}")
                    print(f"    Pool-Größe: {len(pool)}")

                # 1) Erst ohne Programmnamen testen (Variable bleibt inkl. Programmnamen)
                matches_keep = match_kg_var_to_code(kg_name, pool, program_name=None)

                # 2) Dann mit Programmnamen strippen
                matches_strip = match_kg_var_to_code(kg_name, pool, program_name=program_name)

                # 3) Union der Ergebnisse
                matching = sorted(set(matches_keep + matches_strip))
                present = bool(matching)

                if present:
                    comment = f"Name im KG und im Code konsistent ({base})."
                else:
                    comment = f"Nur im KG modelliert ({base}), im Code nicht gefunden."

                if debug:
                    print(f"    Matches: {matching}")
                    print(f"    present_in_code: {present}")
                    print(f"    Kommentar: {comment}")

                per_report.append(
                    {
                        "kg_uri": kg_uri,
                        "kg_role": kg_role,
                        "kg_name": kg_name,
                        "matching_code_variables": matching,
                        "present_in_code": present,
                        "comment": comment,
                    }
                )

        return {
            "program_uri": program_info["program_uri"],
            "variables_from_code": variables_from_code,
            "per_variable_report": per_report,
        }


In [30]:
def build_summary_text(program_uri: str, analysis: dict) -> str:
    """
    Baut einen lesbaren Zusammenfassungstext aus dem per_variable_report
    und ergänzt im Gesamtfazit die Anzahl passender / nicht passender Variablen.
    """
    lines = []
    per_var = analysis.get("per_variable_report", [])

    total = len(per_var)
    matched = sum(1 for entry in per_var if entry.get("present_in_code", False))
    unmatched = total - matched

    for entry in per_var:
        kg_name = entry.get("kg_name", "?")
        role = entry.get("kg_role", "?")
        present = entry.get("present_in_code", False)
        comment = entry.get("comment", "")

        present_str = "im Code vorhanden" if present else "im Code NICHT vorhanden"
        line = f"{kg_name} ({role}): {present_str}. {comment}"
        lines.append(line)
 
    lines.append(
        f"Gesamtfazit: {program_uri} hat {total} modellierte Variablen. "
        f"Davon {matched} mit passenden Code-Variablen und {unmatched} ohne Treffer im Code."
    )

    return "\n".join(lines)



In [31]:
import re

ST_KEYWORDS = {
    "IF","THEN","ELSE","ELSIF","END_IF","CASE","OF","END_CASE",
    "FOR","TO","BY","DO","END_FOR","WHILE","END_WHILE","REPEAT","UNTIL","END_REPEAT",
    "AND","OR","NOT","XOR","MOD","DIV","TRUE","FALSE","RETURN","EXIT"
}

def _strip_st_comments(code: str) -> str:
    code = re.sub(r"\(\*.*?\*\)", " ", code, flags=re.S)
    code = re.sub(r"//.*?$", " ", code, flags=re.M)
    return code

def _st_tokenize(code: str) -> list[str]:
    return re.findall(r"\b[A-Za-z_][\w.]*\b", code)

def _tokens_no_keywords(text: str) -> set[str]:
    return {t for t in _st_tokenize(text) if t.upper() not in ST_KEYWORDS}

def extract_variables_from_st(code: str) -> dict:
    """
    Liefert Sets aller gelesenen/geschriebenen Variablen (ohne VAR-Blöcke).
    - write: linke Seite von ':='
    - read: Tokens auf der rechten Seite und in allen übrigen Kontexten,
            aber nicht allein durch Vorkommen als linke Seite.
    """
    clean = _strip_st_comments(code)
    written = set()
    read = set()

    assign_re = re.compile(r"([A-Za-z_][\w.]*)\s*:=\s*(.*?)(;|$)", flags=re.S)

    # Erst Assignments erfassen
    for m in assign_re.finditer(clean):
        lhs = m.group(1)
        rhs = m.group(2)
        written.add(lhs)
        read.update(_tokens_no_keywords(rhs))

    # Rest ohne Assignments für weitere Reads (IF-Bedingungen etc.)
    rest = assign_re.sub(" ", clean)
    read.update(_tokens_no_keywords(rest))

    return {"written": written, "read": read}


In [32]:
from rdflib import URIRef, Literal
from rdflib.namespace import XSD

def add_consistency_reports(graph: Graph, limit_programs: int | None = None):
    program_uris = get_all_program_uris(graph)
    print(f"Gefundene Programme: {len(program_uris)}")

    for p in program_uris:
        codes = list(g.objects(URIRef(p), DP.hasProgramCode))
        print(p, "-> Code vorhanden:" if codes else "-> KEIN Code!")

    if limit_programs is not None:
        program_uris = program_uris[:limit_programs]
        print(f"Analysiere nur die ersten {len(program_uris)} Programme.")

    for i, p_uri in enumerate(program_uris, start=1):
        print(f"\n[{i}/{len(program_uris)}] Analysiere Programm: {p_uri}")
        info = run_var_query_for_program(graph, p_uri)
        lang = (info.get("programming_language") or "").upper()

        if not info.get("code"):
            print("  -> Übersprungen (kein Code gefunden).")
            continue

        # KG Variablen strukturieren
        kg_variables = build_kg_variables_from_info(info)

        if lang == "FBD":
            variables_from_code = extract_variables_from_python(info["code"])
        elif lang == "ST":
            variables_from_code = extract_variables_from_st(info["code"])
        else:
            print(f"  -> Übersprungen (unbekannte Programmiersprache: {lang}).")
            continue

        # lokalen Analyse Report bauen
        analysis = build_per_variable_report_local(info, variables_from_code, kg_variables, debug=True)


        # Textreport erzeugen
        summary_text = build_summary_text(p_uri, analysis)

        program_ref = URIRef(p_uri)
        # Property Name entsprechend deiner Umbenennung:
        # dp_hassConsistencyReport -> DP.hassConsistencyReport
        lit = Literal(summary_text, datatype=XSD.string)
        graph.add((program_ref, DP.hasConsistencyReport, lit))

        print("  -> dp_hasConsistencyReport hinzugefügt.")

    print("\nFertig.")


In [33]:
clear_all_consistency_reports(g)
add_consistency_reports(g, limit_programs=60)  
OUTPUT_TTL = r"D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test_filled.ttl"
g.serialize(OUTPUT_TTL, format="turtle")
print("Gespeichert:", OUTPUT_TTL)

Gefundene Programme: 8
http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_Betriebsarten -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_Diagnose_D2 -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_InitFahrt_A6_A2 -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_Methode1Job -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_Notaus_D1 -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_FB_ProduktionMitStoerung_D3 -> Code vorhanden:
http://www.semanticweb.org/AgentProgramParams/Program_MAIN -> Code vorhanden:
Analysiere nur die ersten 8 Programme.

[1/8] Analysiere Programm: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1
Ergebnisse für Programm: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 Code gefunde

In [1]:
from kg_manager_optimized import run_analysis, DEFAULT_TTL_PATH

# identisches Verhalten wie bisher, optional Limit setzen
run_analysis(ttl_path=DEFAULT_TTL_PATH, limit_programs=60, debug=True)


Graph geladen mit 766 Tripeln.
[GVL 1/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1
[GVL 2/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Betriebsarten
Skip http://www.semanticweb.org/AgentProgramParams/Program_FB_Betriebsarten: lang=FBD, code=True
[GVL 3/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Diagnose_D2
[GVL 4/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_InitFahrt_A6_A2
[GVL 5/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Methode1Job
[GVL 6/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Notaus_D1
[GVL 7/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_ProduktionMitStoerung_D3
[GVL 8/8] http://www.semanticweb.org/AgentProgramParams/Program_MAIN
[type-map] http://www.semanticweb.org/AgentProgramParams/Var_MAIN_fbAuto -> Program http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 (type=FB_Automatikbetrieb_F1)
[type-map] http://www.semanticweb.

WindowsPath('D:/MA_Python_Agent/MSRGuard_Anpassung/KGs/Test_filled.ttl')

In [6]:
# --- Helfer: ggf. weglassen, falls schon definiert ---
def get_local_name(uri: str) -> str:
    return uri.rsplit("#", 1)[-1].rsplit("/", 1)[-1]

ST_KEYWORDS = frozenset({
    "IF","THEN","ELSE","ELSIF","END_IF","CASE","OF","END_CASE",
    "FOR","TO","BY","DO","END_FOR","WHILE","END_WHILE","REPEAT","UNTIL","END_REPEAT",
    "AND","OR","NOT","XOR","MOD","DIV","TRUE","FALSE","RETURN","EXIT"
})

def _strip_st_comments(code: str) -> str:
    import re
    code = re.sub(r"\(\*.*?\*\)", " ", code, flags=re.S)
    code = re.sub(r"//.*?$", " ", code, flags=re.M)
    return code

def _st_tokenize(code: str) -> list[str]:
    import re
    return re.findall(r"\b[A-Za-z_][\w.]*\b", code)

def _tokens_no_keywords(text: str) -> set[str]:
    return {t for t in _st_tokenize(text) if t.upper() not in ST_KEYWORDS}

def extract_variables_from_st(code: str) -> dict:
    """Sets aller gelesenen/geschriebenen Variablen (ohne VAR-Blöcke)."""
    import re
    clean = _strip_st_comments(code)
    written, read = set(), set()
    assign_re = re.compile(r"([A-Za-z_][\w.]*)\s*:=\s*(.*?)(?:;|$)", flags=re.S)

    for m in assign_re.finditer(clean):
        lhs, rhs = m.group(1), m.group(2)
        written.add(lhs)
        read.update(_tokens_no_keywords(rhs))

    rest = assign_re.sub(" ; ", clean)
    read.update(_tokens_no_keywords(rest))
    return {"written": written, "read": read}
# --- Ende Helfer ---

def get_all_program_uris(graph: Graph) -> list[str]:
    return sorted(str(p) for p in graph.subjects(RDF.type, AG.class_Program))

def detect_global_vars(graph: Graph) -> dict:
    """
    Mapping: gvl_name -> { dotted_name: uri }
    Erfasst alle Variablen vom Typ ag:class_Variable, die kein Var_-Prefix haben
    und im Namen '__dot__' enthalten (werden zu GVL.Name).
    """
    gvl_map: dict[str, dict[str, str]] = {}

    # 1) Alle Variablen-Knoten einsammeln
    for var_uri in graph.subjects(RDF.type, AG.class_Variable):
        var_uri_str = str(var_uri)
        # Preferenziere dp:hasVariableName, fallback auf Local Name
        name_lit = next(graph.objects(var_uri, DP.hasVariableName), None)
        var_name = str(name_lit) if name_lit is not None else get_local_name(var_uri_str)
        if var_name.startswith("Var_") or "__dot__" not in var_name:
            continue

        dotted = var_name.replace("__dot__", ".")
        gvl_name = dotted.split(".", 1)[0]
        gvl_map.setdefault(gvl_name, {})[dotted] = var_uri_str

    # 2) Optional: zusätzlich über bestehende op-Links (falls du beides willst)
    var_preds = [OP.hasInputVariable, OP.hasOutputVariable, OP.hasInternalVariable, OP.usesVariable]
    for pred in var_preds:
        for var_uri in graph.objects(None, pred):
            var_uri_str = str(var_uri)
            local = get_local_name(var_uri_str)
            if local.startswith("Var_") or "__dot__" not in local:
                continue
            dotted = local.replace("__dot__", ".")
            gvl_name = dotted.split(".", 1)[0]
            gvl_map.setdefault(gvl_name, {})[dotted] = var_uri_str

    return gvl_map

def get_program_info(graph: Graph, program_uri: str) -> dict:
    prog = URIRef(program_uri)
    code_literal = next(graph.objects(prog, DP.hasProgramCode), None)
    code_str = str(code_literal) if code_literal is not None else None
    lang = next(graph.objects(prog, DP.hasProgrammingLanguage), None)
    lang_str = str(lang) if lang is not None else None
    return {"program_uri": program_uri, "code": code_str, "programming_language": lang_str}

def link_globals_for_st_program(graph: Graph, program_uri: str, globals_map: dict, debug: bool = False):
    info = get_program_info(graph, program_uri)
    lang = (info.get("programming_language") or "").upper()
    code = info.get("code")
    if lang != "ST" or not code:
        if debug:
            print(f"Skip {program_uri}: lang={lang}, code={bool(code)}")
        return

    vars_st = extract_variables_from_st(code)
    read_set, write_set = vars_st["read"], vars_st["written"]

    # flache Lookup-Map: dotted_name -> uri
    flat_lookup = {dotted: uri for gvl in globals_map.values() for dotted, uri in gvl.items()}

    for dotted_name, uri in flat_lookup.items():
        is_read = dotted_name in read_set
        is_written = dotted_name in write_set
        if not (is_read or is_written):
            continue

        prog_ref = URIRef(program_uri)
        var_ref = URIRef(uri)
        graph.add((prog_ref, OP.usesVariable, var_ref))
        print(f"Adding usesVariable link: {program_uri} uses {uri}")
        if is_read:
            graph.add((prog_ref, OP.hasInputVariable, var_ref))
            print(f"Adding hasInputVariable link: {program_uri} hasInputVariable {uri}")
        if is_written:
            graph.add((prog_ref, OP.hasOutputVariable, var_ref))
            print(f"Adding hasOutputVariable link: {program_uri} hasOutputVariable {uri}")

        if debug:
            print(f"{program_uri} -> {dotted_name} (read={is_read}, written={is_written})")

def add_global_var_links(graph: Graph, limit_programs: int | None = None, debug: bool = False):
    globals_map = detect_global_vars(graph)
    programs = get_all_program_uris(graph)
    if limit_programs is not None:
        programs = programs[:limit_programs]

    for i, p_uri in enumerate(programs, 1):
        if debug:
            print(f"[{i}/{len(programs)}] {p_uri}")
        link_globals_for_st_program(graph, p_uri, globals_map, debug=debug)

# Beispiel-Aufruf im Notebook (Graph g vorausgesetzt):
add_global_var_links(g, limit_programs=None, debug=True)
OUTPUT_TTL = r"D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test_filled.ttl"
g.serialize(OUTPUT_TTL, format="turtle")

[1/8] http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1
Adding usesVariable link: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 uses http://www.semanticweb.org/AgentProgramParams/GVL__dot__Fehler
Adding hasOutputVariable link: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 hasOutputVariable http://www.semanticweb.org/AgentProgramParams/GVL__dot__Fehler
http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 -> GVL.Fehler (read=False, written=True)
Adding usesVariable link: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 uses http://www.semanticweb.org/AgentProgramParams/GVL__dot__Start
Adding hasInputVariable link: http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 hasInputVariable http://www.semanticweb.org/AgentProgramParams/GVL__dot__Start
http://www.semanticweb.org/AgentProgramParams/Program_FB_Automatikbetrieb_F1 -> GVL

<Graph identifier=Nb3e54142ce584fda9b95acf156c9a0e9 (<class 'rdflib.graph.Graph'>)>