In [1]:
# ================================
# River Nutrient Level Prediction
# ================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pickle

# -------------------------------
# Load Dataset
# -------------------------------
df = pd.read_csv("River Sites Nutrient Levels.csv")

# Drop useless column
df = df.drop(columns=["Unnamed: 5"], errors="ignore")

# -------------------------------
# Encode Region column
# -------------------------------
le = LabelEncoder()
df["Region"] = le.fit_transform(df["Region"])

# -------------------------------
# Features & Target
# -------------------------------
X = df.drop("Dissolved_inorganic_nitrogen", axis=1)
y = df["Dissolved_inorganic_nitrogen"]

# -------------------------------
# Train-Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------------
# Train Model
# -------------------------------
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# -------------------------------
# Evaluate
# -------------------------------
y_pred = model.predict(X_test)
print("R2 Score:", r2_score(y_test, y_pred))

# -------------------------------
# Save Model & Encoder
# -------------------------------
with open("nutrient_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("region_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

print("✅ Model and encoder saved")


R2 Score: 0.36153921130696787
✅ Model and encoder saved
