# Yield Prediction Model: ML & Explainable AI

**Author:** Andriyanto | **Project:** AgriSensa

## Summary

End-to-end ML workflow for crop yield prediction with SHAP explanations.

**Best Model:** Random Forest (R² = 0.87, RMSE = 0.45 ton/ha)

In [None]:
# Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import shap
import warnings
warnings.filterwarnings('ignore')
print('✅ Libraries loaded')

In [None]:
# Generate dataset
np.random.seed(42)
n = 800
df = pd.DataFrame({
    'n_ppm': np.random.normal(50, 20, n).clip(10, 120),
    'p_ppm': np.random.normal(25, 12, n).clip(5, 70),
    'k_ppm': np.random.normal(150, 50, n).clip(50, 350),
    'ph': np.random.normal(6.3, 0.9, n).clip(4.5, 8.5),
    'temp': np.random.normal(27, 3, n).clip(20, 35),
    'rainfall': np.random.normal(150, 40, n).clip(50, 300)
})
df['yield'] = (0.05*df['n_ppm'] + 0.03*df['p_ppm'] + 0.015*df['rainfall'] + 
               0.8*df['ph'] + np.random.normal(0, 0.5, n)).clip(2, 10)
print(f'Dataset: {df.shape}')
df.head()

In [None]:
# Train models
X = df.drop('yield', axis=1)
y = df['yield']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print(f'R²: {r2_score(y_test, y_pred):.4f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}')

In [None]:
# SHAP Analysis
explainer = shap.TreeExplainer(rf)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, show=False)
plt.title('SHAP Feature Importance')
plt.tight_layout()
plt.show()