In [1]:
# =========================================
# Zelle 0 — Setup: Pfade + Config/Skill-JSON laden
# =========================================
from pathlib import Path
import json

BASE_DIR = Path.cwd()

# 1) Passe diese Pfade an (Windows: r"...")
KG_IN  = r"D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test2_filled.ttl"
KG_OUT = r"D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test2_filled.ttl"

# JSON Sidecars (liegen idealerweise im gleichen Ordner wie dieses Notebook)
CONFIG_JSON_PATH = BASE_DIR / "Dateien_SkillExtractor/skill_extractor_config.json"
SKILLS_JSON_PATH = BASE_DIR / "Dateien_SkillExtractor/skills_catalog.json"

# Schalter: Skills im KG jedes Mal neu schreiben?
REWRITE_SKILLS_EACH_RUN = True

# Schalter: alte LLM-Hypothesen jedes Mal löschen?
CLEAR_OLD_HYPOTHESES_EACH_RUN = True

config = json.loads(CONFIG_JSON_PATH.read_text(encoding="utf-8"))
skills_catalog = json.loads(SKILLS_JSON_PATH.read_text(encoding="utf-8"))

print("Config geladen:", CONFIG_JSON_PATH)
print("Skills-Katalog geladen:", SKILLS_JSON_PATH)
print("Skills im Katalog:", len(skills_catalog.get("skills", [])))

Config geladen: d:\MA_Python_Agent\Notebooks\Dateien_SkillExtractor\skill_extractor_config.json
Skills-Katalog geladen: d:\MA_Python_Agent\Notebooks\Dateien_SkillExtractor\skills_catalog.json
Skills im Katalog: 48


In [2]:
# =========================================
# Zelle 1 — KG laden + Namespaces
# =========================================
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD

g = Graph()
g.parse(KG_IN, format="turtle")

AG = Namespace("http://www.semanticweb.org/AgentProgramParams/")
DP = Namespace("http://www.semanticweb.org/AgentProgramParams/dp_")
OP = Namespace("http://www.semanticweb.org/AgentProgramParams/op_")

g.bind("ag", AG)
g.bind("dp", DP)
g.bind("op", OP)

print("KG geladen:", KG_IN)
print("Triples:", len(g))

def ag(local: str) -> URIRef:
    return URIRef(str(AG) + local)

def dp(local: str) -> URIRef:
    return URIRef(str(DP) + local)

def op(local: str) -> URIRef:
    return URIRef(str(OP) + local)

KG geladen: D:\MA_Python_Agent\MSRGuard_Anpassung\KGs\Test2_filled.ttl
Triples: 21192


In [3]:
# =========================================
# Zelle 2 — Skills löschen (optional) + aus JSON neu schreiben
# =========================================
SKILL_CLASS = ag("class_Skill")
P_HAS_SKILL_DESCRIPTION = dp("dp_hasSkillDescription")

def _is_skill_node(uri: URIRef) -> bool:
    u = str(uri)
    return u.startswith(str(AG)) and u.split("/")[-1].startswith("Skill_")

def delete_all_skills(graph: Graph) -> int:
    # 1) Skills per rdf:type
    nodes = set(graph.subjects(RDF.type, SKILL_CLASS))
    # 2) zusätzlich: alle ag:Skill_* (falls Typ mal fehlt)
    nodes |= {s for s in graph.subjects(None, None) if isinstance(s, URIRef) and _is_skill_node(s)}

    # OUTGOING + INCOMING löschen, damit keine dangling Kanten bleiben
    for s in nodes:
        graph.remove((s, None, None))
        graph.remove((None, None, s))
    return len(nodes)

def write_skills_from_json(graph: Graph, skills_json: dict) -> int:
    n = 0
    for s in skills_json.get("skills", []):
        sid = s["id"]
        label = s.get("label", "")
        desc = s.get("description", "")

        uri = ag(sid)
        graph.add((uri, RDF.type, SKILL_CLASS))
        if label:
            graph.add((uri, RDFS.label, Literal(label)))
        if desc:
            graph.add((uri, P_HAS_SKILL_DESCRIPTION, Literal(desc)))
        n += 1
    return n

if REWRITE_SKILLS_EACH_RUN:
    deleted = delete_all_skills(g)
    written = write_skills_from_json(g, skills_catalog)
    print(f"Skills neu geschrieben: gelöscht={deleted}, geschrieben={written}")
else:
    print("REWRITE_SKILLS_EACH_RUN=False -> Skills bleiben unverändert")

Skills neu geschrieben: gelöscht=49, geschrieben=48


In [7]:
# =========================================
# Zelle 3 — POUs + Code aus KG holen (robust)
# =========================================
import pandas as pd

Q_POUS = """
SELECT DISTINCT ?pou ?pou_name ?code
WHERE {
  OPTIONAL {
    ?pou ?p_name ?pou_name .
    FILTER ( regex(str(?p_name), "POUName$", "i") )
  }
  OPTIONAL {
    ?pou ?p_code ?code .
    FILTER ( regex(str(?p_code), "POUCode$", "i") )
  }
  FILTER ( bound(?pou_name) || bound(?code) )
}
"""

# 1) Roh lesen (kann Duplikate erzeugen, wenn Name+Code mehrfach vorkommt)
tmp = []
for r in g.query(Q_POUS):
    tmp.append({
        "pou_iri": str(r.pou),
        "pou_name": str(r.pou_name) if r.pou_name else "",
        "code": str(r.code) if r.code else ""
    })

# 2) Stabil: Spalten sind IMMER vorhanden
df_raw = pd.DataFrame(tmp, columns=["pou_iri", "pou_name", "code"])

# 3) Duplikate je POU zusammenführen (erstes nicht-leeres Name/Code gewinnen)
if len(df_raw):
    df_pous = (
        df_raw.groupby("pou_iri", as_index=False)
              .agg({
                  "pou_name": lambda s: next((x for x in s if x), ""),
                  "code":     lambda s: next((x for x in s if x), "")
              })
    )
else:
    df_pous = df_raw

print("POUs geladen:", len(df_pous))

# ---- Debug-Hinweis, falls immer noch 0 ----
if len(df_pous) == 0:
    print("⚠️ Keine POUs gefunden. Dann heißen die Properties bei dir vermutlich nicht ...POUName/...POUCode.")
    print("    Tipp: Suche im KG nach Prädikaten mit 'POU' im Namen (siehe Debug-Zelle unten).")

# 4) Filter aus Config (nur wenn Spalte existiert – tut sie jetzt immer)
inc = config.get("pou_filters", {}).get("include_contains", [])
exc_pref = config.get("pou_filters", {}).get("exclude_prefixes", [])
exc_contains = config.get("pou_filters", {}).get("exclude_contains", [])

for token in inc:
    df_pous = df_pous[df_pous["pou_name"].str.contains(token, na=False)]
for pref in exc_pref:
    df_pous = df_pous[~df_pous["pou_name"].str.startswith(pref, na=False)]
for token in exc_contains:
    df_pous = df_pous[~df_pous["pou_name"].str.contains(token, na=False)]

df_pous = df_pous.reset_index(drop=True)
print("POUs nach Filter:", len(df_pous))

df_pous.head(5)


POUs geladen: 60
POUs nach Filter: 21


Unnamed: 0,pou_iri,pou_name,code
0,http://www.semanticweb.org/AgentProgramParams/...,HRL_CB_AS_HorizontalMoveSensors,// POU HRL_CB_AS_HorizontalMoveSensors body\nH...
1,http://www.semanticweb.org/AgentProgramParams/...,HRL_RGB_AS_HorizontalMoveEncoders,// POU HRL_RGB_AS_HorizontalMoveEncoders body\...
2,http://www.semanticweb.org/AgentProgramParams/...,HRL_RGB_AS_HorizontalMoveSensors,// POU HRL_RGB_AS_HorizontalMoveSensors body\n...
3,http://www.semanticweb.org/AgentProgramParams/...,HRL_RGB_AS_VerticalMoveEncoders,// POU HRL_RGB_AS_VerticalMoveEncoders body\nH...
4,http://www.semanticweb.org/AgentProgramParams/...,MBS_CB_AS_HorizontalMove,// POU MBS_CB_AS_HorizontalMove body\nMBS_CB_H...


In [8]:
# =========================================
# Zelle 4 — Benennungsschema aus POU-Namen parsen (module/submodule/base_action/qualifier)
# =========================================
import re
import pandas as pd

QUAL_RE = re.compile(r"^(?P<base>.*?)(?P<qual>Encoders|Sensors|Encoder)?$")

def parse_pou_name(pou_name: str) -> dict:
    name = (pou_name or "").strip()
    parts = name.split("_")

    if "AS" in parts:
        as_idx = parts.index("AS")
        module = parts[0] if len(parts) > 0 else ""
        submodule = "_".join(parts[1:as_idx]) if as_idx > 1 else ""
        action_raw = "_".join(parts[as_idx+1:]) if as_idx+1 < len(parts) else ""
    else:
        module = parts[0] if parts else ""
        submodule = ""
        action_raw = "_".join(parts[1:]) if len(parts) > 1 else ""

    m = QUAL_RE.match(action_raw)
    base_action = m.group("base") if m else action_raw
    qualifier = m.group("qual") if (m and m.group("qual")) else ""

    return {
        "module": module,
        "submodule": submodule,
        "action_raw": action_raw,
        "base_action": base_action,
        "qualifier": qualifier,
    }

feat = df_pous["pou_name"].apply(parse_pou_name).apply(pd.Series)
df_pous = pd.concat([df_pous, feat], axis=1)

df_pous[["pou_name","module","submodule","base_action","qualifier"]].head(10)

Unnamed: 0,pou_name,module,submodule,base_action,qualifier
0,HRL_CB_AS_HorizontalMoveSensors,HRL,CB,HorizontalMove,Sensors
1,HRL_RGB_AS_HorizontalMoveEncoders,HRL,RGB,HorizontalMove,Encoders
2,HRL_RGB_AS_HorizontalMoveSensors,HRL,RGB,HorizontalMove,Sensors
3,HRL_RGB_AS_VerticalMoveEncoders,HRL,RGB,VerticalMove,Encoders
4,MBS_CB_AS_HorizontalMove,MBS,CB,HorizontalMove,
5,MBS_DmPD_AS_HorizontalMove,MBS,DmPD,HorizontalMove,
6,MBS_DmPD_AS_RotationMove,MBS,DmPD,RotationMove,
7,MBS_MR01_AS_HardeningProcess,MBS,MR01,HardeningProcess,
8,MBS_MR01_AS_HorizontalMoveSensors,MBS,MR01,HorizontalMove,Sensors
9,MBS_MR01_AS_SecuringProcess,MBS,MR01,SecuringProcess,


In [9]:
# =========================================
# Zelle 5 — Skills aus KG lesen (robust: dp_ + ag-namespace Variationen)
# =========================================
import pandas as pd

Q_SKILLS = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ag:  <http://www.semanticweb.org/AgentProgramParams/>
PREFIX dp:  <http://www.semanticweb.org/AgentProgramParams/dp_>

SELECT ?skill ?label ?desc
WHERE {
  ?skill rdf:type ag:class_Skill .
  OPTIONAL { ?skill rdfs:label ?label . }
  OPTIONAL { ?skill dp:dp_hasSkillDescription ?desc . }
}
"""

skill_rows = []
for r in g.query(Q_SKILLS):
    skill_rows.append({
        "skill_iri": str(r.skill),
        "label": str(r.label) if r.label else "",
        "desc": str(r.desc) if r.desc else ""
    })

df_skills = pd.DataFrame(skill_rows).drop_duplicates(subset=["skill_iri"]).reset_index(drop=True)
print("Skills im KG:", len(df_skills))
df_skills.head(8)

Skills im KG: 48


Unnamed: 0,skill_iri,label,desc
0,http://www.semanticweb.org/AgentProgramParams/...,HRL: RBG X verfahren,Verfährt den Regalbediengerät Schlitten entlan...
1,http://www.semanticweb.org/AgentProgramParams/...,HRL: RBG Y verfahren,Verfährt die Hubachse entlang der Y Achse zur ...
2,http://www.semanticweb.org/AgentProgramParams/...,HRL: Ausleger verfahren,Fährt den Ausleger des Regalbediengeräts ein o...
3,http://www.semanticweb.org/AgentProgramParams/...,HRL: Förderband antreiben,Startet oder stoppt das Förderband der Ein und...
4,http://www.semanticweb.org/AgentProgramParams/...,HRL: Sicherheitsprüfung,Prüft sicherheitsrelevante Sensorik oder Zustä...
5,http://www.semanticweb.org/AgentProgramParams/...,SG1: Horizontal verfahren,Verfährt den Vakuum Greifer Roboter entlang de...
6,http://www.semanticweb.org/AgentProgramParams/...,SG1: Vertikal verfahren,Verfährt den Vakuum Greifer Roboter entlang de...
7,http://www.semanticweb.org/AgentProgramParams/...,SG1: Rotieren,Rotiert den Greifer oder die Werkzeugaufnahme ...


In [10]:
# =========================================
# Zelle 6 — Prompt Context aus JSON (Abkürzungen etc.)
# =========================================
import json

ABBREVIATIONS = config.get("abbreviations", {})
SKILL_VERBS   = config.get("skill_verbs", {})

GLOSSARY_TEXT = (
    "Glossary / Prefix legend for identifiers:\n"
    + json.dumps({"abbreviations": ABBREVIATIONS, "skill_verbs": SKILL_VERBS}, ensure_ascii=False, indent=2)
)

print(GLOSSARY_TEXT[:900])

Glossary / Prefix legend for identifiers:
{
  "abbreviations": {
    "HRL": "Hochregallager (storage & retrieval / high-bay warehouse)",
    "RBG": "Regalbediengerät (warehouse gantry/robot)",
    "MBS": "Multi-Bearbeitungsstation (multi processing station)",
    "VSG": "Vakuum-Sauggreifer (vacuum suction gripper / pick-and-place)",
    "SST": "Sortierstrecke / Sortierstation (sorting station/line)",
    "CS": "Conveyor & Stanzmaschine (conveyor + punching station)",
    "CB": "Conveyor Belt (Förderband)",
    "PD": "Pushing Device (Schiebeeinheit/Transfer)",
    "MR01": "Bearbeitungsmodul 01 (im MBS-Kontext)",
    "MR02": "Bearbeitungsmodul 02 (im MBS-Kontext)",
    "AS": "Atomic Skill Marker in POU-Namen (…_AS_…)"
  },
  "skill_verbs": {
    "HorizontalMove": "horizontale Bewegung/Transport",
    "VerticalMove": "vertikale Bewegung",
    "RotationMove": "Rotation/Drehbewegung",
    "Su


In [11]:
# =========================================
# Zelle 7 — Candidate Picking (station-aware + action hints)
# =========================================
import re
import pandas as pd

token_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")

def norm_tokens(text: str):
    return set(t.lower() for t in token_re.findall(text or ""))

def station_from_skill_label(label: str) -> str:
    if not label or ":" not in label:
        return ""
    return label.split(":")[0].strip()

MODULE_TO_SKILL_STATIONS = config.get("module_to_skill_stations", {})
ACTION_HINTS = config.get("action_hints", {})

def build_skill_text(label: str, desc: str, base_action: str) -> str:
    text = f"{label or ''} {desc or ''}".lower()
    for hint in ACTION_HINTS.get(base_action, []):
        text += " " + hint
    return text

def pick_candidates_for_row(row, df_skills: pd.DataFrame, top_n: int = 12):
    module = row.get("module", "") or ""
    base_action = row.get("base_action", "") or ""
    code = row.get("code", "") or ""

    allowed_stations = set(MODULE_TO_SKILL_STATIONS.get(module, []))
    if allowed_stations:
        df0 = df_skills[df_skills["label"].fillna("").apply(station_from_skill_label).isin(allowed_stations)]
        df0 = df0 if len(df0) else df_skills
    else:
        df0 = df_skills

    code_toks = norm_tokens(code)

    scored = []
    for _, s in df0.iterrows():
        label = s.get("label","") or ""
        desc  = s.get("desc","") or ""
        txt = build_skill_text(label, desc, base_action)
        stoks = norm_tokens(txt)
        token_score = len(code_toks & stoks) / max(1, len(stoks))
        scored.append((token_score, s))

    scored.sort(key=lambda x: x[0], reverse=True)
    picked = [s for sc, s in scored if sc > 0][:top_n]
    if not picked:
        picked = [s for _, s in scored[:top_n]]
    return pd.DataFrame(picked)

# Quick test
if len(df_pous) and len(df_skills):
    cand = pick_candidates_for_row(df_pous.iloc[0].to_dict(), df_skills, top_n=config["llm"]["candidate_top_n"])
    cand[["label","desc"]].head(8)

In [12]:
# =========================================
# Zelle 8 — Groq Setup + LLM Inferenz (nur JSON)
# =========================================
from groq import Groq
import json
import time

api_key_path = r"C:\Users\Alexander Verkhov\Desktop\APIKey_Groq.txt"
with open(api_key_path, "r", encoding="utf-8") as f:
    groq_api_key = f.read().strip()

client = Groq(api_key=groq_api_key)

MODEL_NAME = config["llm"]["model"]

TASK_DEFINITION = """
Du bekommst IEC 61131-3 Structured Text Code eines POU/Programms aus einer SPS (fischertechnik I4.0 Demonstrator/Simulator Kontext).
Außerdem bekommst du einen Katalog an Skills (Name + Kurzbeschreibung).
Deine Aufgabe: schätze, welche Skills durch diesen POU implementiert werden.

Wichtig:
- Wähle NUR Skills aus der übergebenen Kandidatenliste (allowed_skill_iris).
- Nutze das Glossar, um Prefixe in POU-Namen/Variablen zu verstehen (z.B. HRL, MBS, VSG, SST, AS).
- Wenn du dir nicht sicher bist oder nichts passt, gib matches als [] zurück.
- Gib AUSSCHLIESSLICH ein JSON-Objekt zurück (kein Markdown, kein Fließtext).

JSON Schema:
{
  "pou_name": "...",
  "matches": [
    {
      "skill_iri": "<muss exakt einer aus allowed_skill_iris sein>",
      "skill_label": "<optional>",
      "confidence": 0.0,
      "reason_short": "...",
      "evidence": ["1-6 kurze Codezeilen wörtlich"]
    }
  ],
  "notes": ""
}
""".strip()

EXPECTED_KEYS_REQUIRED = ["pou_name", "matches"]   # notes ist optional!

def _extract_json(text: str):
    if not text:
        raise ValueError("Empty response")
    text = text.strip()
    return json.loads(text)

def infer_skill_implementations(pou_row: dict, candidates_df, model: str):
    cand_list = []
    allowed_skill_iris = []

    for _, r in candidates_df.iterrows():
        cand_list.append({
            "skill_iri": r["skill_iri"],
            "label": r.get("label",""),
            "desc": r.get("desc","")
        })
        allowed_skill_iris.append(r["skill_iri"])

    prompt = f"""
{TASK_DEFINITION}

{GLOSSARY_TEXT}

POU-Name: {pou_row.get("pou_name","")}
module/submodule/base_action: {pou_row.get("module","")} / {pou_row.get("submodule","")} / {pou_row.get("base_action","")}

allowed_skill_iris:
{json.dumps(allowed_skill_iris, ensure_ascii=False)}

Skill-Kandidaten (nur diese wählen):
{json.dumps(cand_list, ensure_ascii=False, indent=2)}

Code:
{pou_row.get("code","")}
""".strip()

    resp = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "Du bist ein präziser Industrie-4.0 Skill-Analyst. Antworte nur mit JSON."},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
        # JSON Mode (Groq)
        response_format={"type": "json_object"},
    )

    txt = resp.choices[0].message.content
    data = _extract_json(txt)

    # Minimal sanity checks
    missing = [k for k in EXPECTED_KEYS_REQUIRED if k not in data]
    if missing:
        raise ValueError(f"Missing keys in JSON: {missing}")

    if "notes" not in data:
        data["notes"] = ""

    # Hard-filter: nur erlaubte Skills durchlassen
    allowed = set(allowed_skill_iris)
    filtered = []
    for m in (data.get("matches") or []):
        if m.get("skill_iri") in allowed:
            filtered.append(m)
    data["matches"] = filtered

    return data

In [13]:
# =========================================
# Zelle 9 — Ergebnisse in KG schreiben + Loop (mit Cleanup)
# =========================================
from rdflib import Literal, URIRef
from rdflib.namespace import XSD
from datetime import datetime, timezone
import hashlib
import pandas as pd
import time

CLASS_HYP = ag("class_SkillImplementationHypothesis")

P_ABOUT_POU   = ag("op_hasHypothesisAboutPOU")
P_ABOUT_SKILL = ag("op_hasHypothesisAboutSkill")
P_CONF        = ag("dp_hasConfidence")
P_REASON      = ag("dp_hasRationale")
P_EVID        = ag("dp_hasEvidenceSnippet")
P_MODEL       = ag("dp_inferredByModel")
P_TIME        = ag("dp_inferredAt")

P_IMPL        = ag("op_implementsSkill")

def hyp_uri(pou_iri: str, skill_iri: str, model: str) -> URIRef:
    h = hashlib.sha1(f"{pou_iri}|{skill_iri}|{model}".encode("utf-8")).hexdigest()[:12]
    return URIRef(str(AG) + f"SkillImplHyp_{h}")

def cleanup_old_hypotheses(graph: Graph) -> int:
    old_claims = set()
    old_claims |= set(graph.subjects(RDF.type, CLASS_HYP))
    old_claims |= set(graph.subjects(P_ABOUT_POU, None))
    old_claims |= set(graph.subjects(P_ABOUT_SKILL, None))
    old_claims |= set(graph.subjects(P_MODEL, None))

    for c in old_claims:
        graph.remove((c, None, None))
        graph.remove((None, None, c))

    graph.remove((None, P_IMPL, None))
    return len(old_claims)

def write_mapping_to_kg(pou_iri: str, mapping_json: dict, model: str):
    pou_uri = URIRef(pou_iri)
    now = datetime.now(timezone.utc).isoformat()

    for m in (mapping_json.get("matches") or []):
        conf = float(m.get("confidence", 0.0))

        # Optional: Confidence-Schwelle
        if conf < float(config["llm"]["confidence_threshold"]):
            continue

        skill_uri = URIRef(m["skill_iri"])
        claim = hyp_uri(pou_iri, m["skill_iri"], model)

        g.remove((claim, None, None))

        g.add((claim, RDF.type, CLASS_HYP))
        g.add((claim, P_ABOUT_POU, pou_uri))
        g.add((claim, P_ABOUT_SKILL, skill_uri))
        g.add((claim, P_CONF, Literal(conf, datatype=XSD.decimal)))
        g.add((claim, P_REASON, Literal(m.get("reason_short", ""))))
        g.add((claim, P_MODEL, Literal(model)))
        g.add((claim, P_TIME, Literal(now, datatype=XSD.dateTime)))

        for ev in (m.get("evidence") or [])[:6]:
            g.add((claim, P_EVID, Literal(ev)))

        g.add((pou_uri, P_IMPL, skill_uri))

# Cleanup
if CLEAR_OLD_HYPOTHESES_EACH_RUN:
    n = cleanup_old_hypotheses(g)
    print("Alte Hypothesen gelöscht:", n)

# Loop
MAX_POUS = int(config["llm"]["max_pous"])
TOP_N = int(config["llm"]["candidate_top_n"])
SLEEP_S = float(config["llm"]["sleep_s"])

rows_out = []
for i, row in df_pous.head(MAX_POUS).iterrows():
    try:
        candidates_df = pick_candidates_for_row(row.to_dict(), df_skills, top_n=TOP_N)
        out = infer_skill_implementations(row.to_dict(), candidates_df, model=MODEL_NAME)

        write_mapping_to_kg(row["pou_iri"], out, model=MODEL_NAME)

        # für DataFrame
        for m in out.get("matches", []):
            rows_out.append({
                "pou": row["pou_name"],
                "skill": m.get("skill_label", ""),
                "conf": float(m.get("confidence", 0.0))
            })

        print(f"[{i+1}/{min(MAX_POUS,len(df_pous))}] {row['pou_name']} -> matches={len(out.get('matches',[]))}")
        time.sleep(SLEEP_S)

    except Exception as e:
        print(f"[{i+1}] ERROR bei {row['pou_name']}: {e}")

df_results = pd.DataFrame(rows_out).sort_values("conf", ascending=False)
df_results.head(30)

Alte Hypothesen gelöscht: 26
[1/21] HRL_CB_AS_HorizontalMoveSensors -> matches=2
[2/21] HRL_RGB_AS_HorizontalMoveEncoders -> matches=3
[3/21] HRL_RGB_AS_HorizontalMoveSensors -> matches=2
[4/21] HRL_RGB_AS_VerticalMoveEncoders -> matches=2
[5/21] MBS_CB_AS_HorizontalMove -> matches=1
[6/21] MBS_DmPD_AS_HorizontalMove -> matches=1
[7/21] MBS_DmPD_AS_RotationMove -> matches=1
[8/21] MBS_MR01_AS_HardeningProcess -> matches=2
[9/21] MBS_MR01_AS_HorizontalMoveSensors -> matches=1
[10/21] MBS_MR01_AS_SecuringProcess -> matches=4
[11/21] MBS_MR02_AS_CuttingProcess -> matches=1
[12/21] MBS_VSG_AS_HorizontalMove -> matches=1
[13/21] MBS_VSG_AS_SuctionProcess -> matches=2
[14/21] MBS_VSG_AS_VerticalMove -> matches=1
[15/21] SST_CB_AS_HorizontalMove -> matches=2
[16/21] SST_CS_AS_ColorDetection -> matches=1
[17/21] SST_PD_AS_HorizontalMove -> matches=3
[18/21] VSG_AS_CompressorControl -> matches=2
[19/21] VSG_AS_HorizontalMoveEncoder -> matches=1
[20/21] VSG_AS_RotationMoveEncoder -> matches=3
[2

Unnamed: 0,pou,skill,conf
20,MBS_VSG_AS_HorizontalMove,MBS: Greifer horizontal verfahren,0.9
19,MBS_MR02_AS_CuttingProcess,MBS: Schneiden,0.9
10,MBS_DmPD_AS_HorizontalMove,MBS: Greifer horizontal verfahren,0.9
12,MBS_MR01_AS_HardeningProcess,MBS: Härten,0.9
11,MBS_DmPD_AS_RotationMove,MBS: Drehteller rotieren,0.9
36,VSG_AS_VerticalMoveEncoder,SG1: Vertikal verfahren,0.9
32,VSG_AS_HorizontalMoveEncoder,SG1: Horizontal verfahren,0.9
33,VSG_AS_RotationMoveEncoder,SG1: Rotieren,0.9
34,VSG_AS_RotationMoveEncoder,SG2: Rotieren,0.8
0,HRL_CB_AS_HorizontalMoveSensors,HRL: Förderband antreiben,0.8


In [None]:
# =========================================
# Zelle 10 — KG speichern + optional Skills wieder exportieren
# =========================================
from pathlib import Path
import json

g.serialize(destination=KG_OUT, format="turtle")
print("KG gespeichert:", KG_OUT)
print("Triples:", len(g))

# Optional: Skills aus KG wieder in skills_catalog.json exportieren (wenn du den KG als Source-of-Truth nutzen willst)
EXPORT_SKILLS_FROM_KG = False

if EXPORT_SKILLS_FROM_KG:
    Q_SKILLS_EXPORT = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ag:  <http://www.semanticweb.org/AgentProgramParams/>
    PREFIX dp:  <http://www.semanticweb.org/AgentProgramParams/dp_>

    SELECT ?skill ?label ?desc
    WHERE {
      ?skill rdf:type ag:class_Skill .
      OPTIONAL { ?skill rdfs:label ?label . }
      OPTIONAL { ?skill dp:dp_hasSkillDescription ?desc . }
    }
    """
    out_sk = []
    for r in g.query(Q_SKILLS_EXPORT):
        out_sk.append({
            "id": str(r.skill).split("/")[-1],
            "label": str(r.label) if r.label else "",
            "description": str(r.desc) if r.desc else "",
            "station_prefix": (str(r.label).split(":")[0].strip() if r.label and ":" in str(r.label) else ""),
            "aliases": []
        })

    payload = {
        "schema_version": "1.0",
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "notes": "Export aus KG",
        "skills": sorted(out_sk, key=lambda x: x["id"])
    }
    SKILLS_JSON_PATH.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    print("Skills exportiert nach:", SKILLS_JSON_PATH)