<a href="https://colab.research.google.com/github/vidyeah004/almosttttttheree/blob/main/aircraft_maintenance_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Aircraft Maintenance Troubleshooting Bot (ML-powered)
- TF-IDF + NearestNeighbors for retrieval from maintenance logs
- LogisticRegression classifier for category prediction
- Simple knowledge base fallback
"""

import random
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# starter data
SAMPLE_LOGS = [
    {
        "id": 1,
        "symptom": "Engine vibration at cruise and unusual noise from gearbox area",
        "diagnosis": "Gearbox bearing wear and slight misalignment",
        "fix": "Replaced gearbox bearings, re-aligned gearbox, test-run OK",
        "category": "powerplant"
    },
    {
        "id": 2,
        "symptom": "Left landing light not turning on when switched",
        "diagnosis": "Faulty bulb connector and intermittent wiring contact",
        "fix": "Replaced connector, re-crimped wiring, verified continuity",
        "category": "electrical"
    },
    {
        "id": 3,
        "symptom": "Hydraulic pressure drop during gear extension",
        "diagnosis": "Hydraulic leak in actuator line, worn seal",
        "fix": "Replaced actuator seal, pressure test passed",
        "category": "hydraulic"
    },
    {
        "id": 4,
        "symptom": "Autopilot disengages intermittently during turbulence",
        "diagnosis": "Loose connector on the autopilot controller",
        "fix": "Secured connector, updated harness clamp, functional test OK",
        "category": "avionics"
    },
    {
        "id": 5,
        "symptom": "Cabin altitude warning lights flicker",
        "diagnosis": "Faulty cabin pressure sensor and loose mounting",
        "fix": "Replaced sensor and tightened mount, verified pressure recorders",
        "category": "environmental"
    }
]
# Knowledge base (fallback)
KNOWLEDGE_BASE = {
    "oil leak": "Common causes: seal failure, loose fittings, cracked hoses. Check sump and lines for visual leaks, perform pressure test.",
    "noisy gearbox": "Possible bearing wear or lack of lubrication. Inspect oil level, check bearing play, review maintenance history.",
    "landing light": "Check bulb, fuse, switch, and wiring harness. Verify power at connector with multimeter.",
    "hydraulic leak": "Inspect hoses, fittings, and actuators. Use dye/UV for small leaks; pressure test to localize.",
    "autopilot disengage": "Check servo connections, warnings or transient bus power interruptions, and autopilot fault logs."
}
# Utility: load or create dataframe
def load_logs_from_csv(path="logs.csv"):
    if os.path.exists(path):
        df = pd.read_csv(path)
        # ensure necessary columns exist
        required = {"symptom", "diagnosis", "fix"}
        if not required.issubset(set(df.columns)):
            raise ValueError(f"CSV must contain columns: {required}")
        df = df.fillna("")
        return df
    else:
        # use SAMPLE_LOGS as fallback
        return pd.DataFrame(SAMPLE_LOGS)
# Build retrieval + classifier
class MaintenanceBot:
    def __init__(self, logs_df: pd.DataFrame):
        self.logs = logs_df.reset_index(drop=True).copy()
        # Create a single 'text' field for searching
        self.logs["text"] = (self.logs["symptom"].astype(str) + " | " +
                             self.logs["diagnosis"].astype(str) + " | " +
                             self.logs["fix"].astype(str))
        self.vectorizer = TfidfVectorizer(ngram_range=(1,2), max_features=5000)
        self.nn = None
        self.classifier = None
        self.label_encoder = None
        self._train_retrieval()
        # try to train category classifier if category exists
        if "category" in self.logs.columns and self.logs["category"].notnull().any():
            self._train_classifier()

    def _train_retrieval(self):
        texts = self.logs["text"].tolist()
        self.tfidf = self.vectorizer.fit_transform(texts)
        # NearestNeighbors for fast similarity retrieval
        self.nn = NearestNeighbors(n_neighbors=5, metric="cosine")
        self.nn.fit(self.tfidf)
        print("[INFO] Retrieval model trained on", len(texts), "logs.")

    def _train_classifier(self):
        # train a simple classifier to predict category from symptom
        df = self.logs.dropna(subset=["category"])
        X = self.vectorizer.transform(df["symptom"].astype(str))
        y = df["category"].astype(str)
        self.label_encoder = LabelEncoder()
        y_enc = self.label_encoder.fit_transform(y)
        X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42)
        clf = LogisticRegression(max_iter=1000)
        clf.fit(X_train, y_train)
        acc = clf.score(X_test, y_test) if X_test.shape[0] > 0 else 0.0
        self.classifier = clf
        print(f"[INFO] Classifier trained on {len(df)} labeled logs. Test accuracy ≈ {acc:.2f}")

    def retrieve_similar(self, query: str, k=3):
        q_tfidf = self.vectorizer.transform([query])
        dists, idxs = self.nn.kneighbors(q_tfidf, n_neighbors=min(k, len(self.logs)))
        results = []
        for dist, idx in zip(dists[0], idxs[0]):
            row = self.logs.iloc[idx]
            results.append({
                "id": row.get("id", idx),
                "symptom": row["symptom"],
                "diagnosis": row["diagnosis"],
                "fix": row["fix"],
                "category": row.get("category", None),
                "score": float(1 - dist)  # similarity
            })
        return results

    def predict_category(self, query: str):
        if self.classifier is None:
            return None
        qtf = self.vectorizer.transform([query])
        pred = self.classifier.predict(qtf)[0]
        label = self.label_encoder.inverse_transform([pred])[0]
        return label

    def answer(self, user_query: str):
        # 1) Try knowledge base keyword
        low = user_query.lower()
        for key, val in KNOWLEDGE_BASE.items():
            if key in low:
                return {
                    "type": "kb",
                    "text": val
                }

        # 2) Retrieve similar logs
        sims = self.retrieve_similar(user_query, k=3)
        # If top match high similarity -> return retrieved fix
        if sims and sims[0]["score"] > 0.35:
            text = self._format_retrieval_answer(sims)
            category = sims[0].get("category", None)
            return {"type": "retrieval", "text": text, "category": category, "matches": sims}

        # 3) If classifier exists, give predicted category + suggestions
        cat = self.predict_category(user_query)
        if cat:
            suggestion = f"Predicted fault category: {cat}. Recommended checks: inspect related components for {cat} issues."
            return {"type": "prediction", "text": suggestion, "category": cat}

        # 4) Fallback generic help
        return {"type": "fallback", "text": "I couldn't find a close match. Please give more details (systems affected, flight phase, fault codes, repeated or intermittent?)."}

    def _format_retrieval_answer(self, sims):
        lines = []
        for i, s in enumerate(sims):
            lines.append(f"Match {i+1} (sim={s['score']:.2f}):\nSymptom: {s['symptom']}\nDiagnosis: {s['diagnosis']}\nFix: {s['fix']}\n")
        lines.append("Tip: Use these as references — always follow the aircraft maintenance manual (AMM) and log the job.")
        return "\n".join(lines)

# ---------------------------
# Interactive CLI
# ---------------------------
def run_cli(bot: MaintenanceBot):
    print("\n=== Aircraft Maintenance Troubleshooting Bot ===")
    print("Type your symptom or fault description. Type 'exit' or 'quit' to stop.\n")
    while True:
        q = input("You: ").strip()
        if not q:
            continue
        if q.lower() in ("exit", "quit", "bye"):
            print("Bot: Safe flights. Bye.")
            break
        ans = bot.answer(q)
        if ans["type"] == "kb":
            print("Bot [KB]:", ans["text"])
        elif ans["type"] == "retrieval":
            print("Bot [History Matches]:")
            print(ans["text"])
        elif ans["type"] == "prediction":
            print("Bot [Prediction]:", ans["text"])
        else:
            print("Bot:", ans["text"])

# ---------------------------
# Main
# ---------------------------
if __name__ == "__main__":
    # load logs if present, else use sample logs
    df_logs = load_logs_from_csv("logs.csv")
    bot = MaintenanceBot(df_logs)
    run_cli(bot)


[INFO] Retrieval model trained on 5 logs.
[INFO] Classifier trained on 5 labeled logs. Test accuracy ≈ 0.00

=== Aircraft Maintenance Troubleshooting Bot ===
Type your symptom or fault description. Type 'exit' or 'quit' to stop.

You: Oil leak near engine pylon after shutdown.
Bot [KB]: Common causes: seal failure, loose fittings, cracked hoses. Check sump and lines for visual leaks, perform pressure test.
Bot [History Matches]:
Match 1 (sim=0.37):
Diagnosis: Faulty cabin pressure sensor and loose mounting
Fix: Replaced sensor and tightened mount, verified pressure recorders

Match 2 (sim=0.07):
Symptom: Autopilot disengages intermittently during turbulence
Diagnosis: Loose connector on the autopilot controller
Fix: Secured connector, updated harness clamp, functional test OK

Match 3 (sim=0.00):
Symptom: Left landing light not turning on when switched
Diagnosis: Faulty bulb connector and intermittent wiring contact
Fix: Replaced connector, re-crimped wiring, verified continuity

Tip: 