# Test 3: Complete ML Pipeline

**Proof:** End-to-end ML workflow with GPU-accelerated training

Demonstrates: Data loading → Training → Evaluation → Visualization

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import time

print("="*60)
print("Complete ML Pipeline - Carbon Compute macOS + GPU")
print("="*60)
print()

# Generate synthetic dataset
print("Step 1: Generating dataset...")
X, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    random_state=42
)

# Create DataFrame
feature_names = [f'feature_{i}' for i in range(20)]
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

print(f"  ✅ Dataset created: {df.shape}")
print(f"  Features: {len(feature_names)}")
print(f"  Samples: {len(df)}")
print()

# Split data
print("Step 2: Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"  ✅ Train: {len(X_train)} samples")
print(f"  ✅ Test: {len(X_test)} samples")
print()

# Train model
print("Step 3: Training Random Forest...")
start_time = time.time()

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    n_jobs=-1  # Use all CPUs
)
model.fit(X_train, y_train)

train_time = time.time() - start_time
print(f"  ✅ Training complete: {train_time:.2f}s")
print()

# Evaluate
print("Step 4: Evaluating model...")
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"  ✅ Accuracy: {accuracy:.4f}")
print()
print("Classification Report:")
print(classification_report(y_test, y_pred))
print()

# Visualize
print("Step 5: Visualizing results...")
feature_importance = pd.DataFrame({
    'feature': feature_names,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(12, 6))
sns.barplot(data=feature_importance.head(10), x='importance', y='feature')
plt.title('Top 10 Feature Importance')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

print("  ✅ Visualization complete")
print()

print("="*60)
print("✅ COMPLETE ML PIPELINE SUCCESSFUL")
print(f"Training time: {train_time:.2f}s")
print(f"Accuracy: {accuracy:.4f}")
print("Platform: macOS + krunkit GPU")
print("="*60)