# 📊 Predictive Defect Risk Analysis

Train a model to predict defect-prone modules using historical data.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
# Simulated dataset
import numpy as np
data = pd.DataFrame({
    'lines_of_code': np.random.randint(100, 1000, 100),
    'cyclomatic_complexity': np.random.randint(1, 15, 100),
    'defect': np.random.randint(0, 2, 100)
})

In [None]:
# Split & Train
X = data[['lines_of_code', 'cyclomatic_complexity']]
y = data['defect']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

preds = clf.predict(X_test)
print(classification_report(y_test, preds))

In [None]:
# Optional: Explainability with SHAP
# !pip install shap
# import shap
# explainer = shap.Explainer(clf, X_test)
# shap_values = explainer(X_test)
# shap.plots.beeswarm(shap_values)