# Namma Mann - Soil Model Training (XGBoost → ONNX)

Train on soil data → export to ONNX for FastAPI inference.
Run in **Google Colab** for GPU support.

In [None]:
!pip install xgboost onnx skl2onnx numpy pandas -q

In [None]:
import numpy as np
import xgboost as xgb
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

np.random.seed(42)
n_samples = 1000
# Synthetic soil data: pH, moisture, N, P, K
X = np.random.uniform(
    low=[3.5, 0, 0, 0, 0],
    high=[10, 100, 500, 200, 500],
    size=(n_samples, 5)
)
# Health score 0-1 (heuristic: pH 6-7.5, N>=25, P>=15, K>=40 = good)
y = np.clip(
    0.5 + 0.1 * (np.abs(X[:, 0] - 6.75) < 1.5).astype(float)
    + 0.1 * (X[:, 2] >= 25) + 0.1 * (X[:, 3] >= 15) + 0.1 * (X[:, 4] >= 40)
    + 0.05 * np.random.randn(n_samples),
    0, 1
)
print("X shape:", X.shape, "y shape:", y.shape)

In [None]:
model = xgb.XGBRegressor(n_estimators=100, max_depth=4, random_state=42)
model.fit(X, y)
print("XGBoost trained.")

In [None]:
initial_type = [("float_input", FloatTensorType([None, 5]))]
onnx_model = convert_sklearn(model, initial_types=initial_type, target_opset=12)
with open("soil_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())
print("Exported soil_model.onnx")

In [None]:
# Verify
import onnxruntime as ort
sess = ort.InferenceSession("soil_model.onnx", providers=["CPUExecutionProvider"])
pred = sess.run(None, {sess.get_inputs()[0].name: X[:3].astype(np.float32)})
print("Sample predictions:", pred[0])