In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
from xgboost import XGBClassifier
import json
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

In [None]:
# =============== 2. Load & Explore Dataset ===================
df = pd.read_csv("/content/crop_recommendation_dataset.csv")
features = ['nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']
X = df[features]
y = df['crop']

In [None]:
# =============== 3. Preprocessing ============================
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)


In [None]:
# =============== 4. Model Training ============================
model = XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, objective='multi:softprob', eval_metric='mlogloss', use_label_encoder=False)
model.fit(X_train, y_train)

In [None]:
# =============== 5. Evaluation ================================
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cv_scores = cross_val_score(model, X_scaled, y_encoded, cv=5)


In [None]:
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Cross-validation Score: {cv_scores.mean():.4f} ± {cv_scores.std():.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Test Accuracy: 0.0545
Cross-validation Score: 0.0477 ± 0.0097

Classification Report:
               precision    recall  f1-score   support

       apple       0.00      0.00      0.00        20
      banana       0.10      0.10      0.10        20
   blackgram       0.11      0.10      0.10        20
    chickpea       0.00      0.00      0.00        20
     coconut       0.05      0.05      0.05        20
      coffee       0.06      0.05      0.05        20
      cotton       0.06      0.05      0.05        20
      grapes       0.07      0.10      0.08        20
        jute       0.00      0.00      0.00        20
 kidneybeans       0.03      0.05      0.04        20
      lentil       0.07      0.05      0.06        20
       maize       0.00      0.00      0.00        20
       mango       0.06      0.10      0.08        20
   mothbeans       0.11      0.15      0.13        20
    mungbean       0.07      0.05      0.06        20
   muskmelon       0.09      0.10      0.09     

In [None]:
# =============== 6. Feature Importance ========================
importances = model.feature_importances_
importance_df = pd.DataFrame({'Feature': features, 'Importance': importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)
print("\nFeature Importance:\n", importance_df)



Feature Importance:
        Feature  Importance
4     humidity    0.157471
5           ph    0.146329
6     rainfall    0.141970
1   phosphorus    0.141238
2    potassium    0.140154
0     nitrogen    0.138275
3  temperature    0.134562


In [None]:
# =============== 7. Save Model and Metadata ===================
joblib.dump(model, 'xgboost_crop_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(scaler, 'feature_scaler.pkl')
joblib.dump(features, 'feature_names.pkl')

['feature_names.pkl']

In [None]:
metadata = {
    'model_version': '1.0',
    'accuracy': round(float(accuracy), 4),
    'cross_val_mean': round(float(cv_scores.mean()), 4),
    'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'features': features,
    'classes': list(label_encoder.classes_)
}

with open('model_metadata.json', 'w') as f:
    json.dump(metadata, f)

In [None]:
# =============== 8. Prediction Example ========================
def predict_crop(input_list):
    input_df = pd.DataFrame([input_list], columns=features)
    input_scaled = scaler.transform(input_df)
    probs = model.predict_proba(input_scaled)[0]
    top_indices = np.argsort(probs)[-3:][::-1]
    results = [(label_encoder.classes_[i], probs[i]*100) for i in top_indices]
    return results

print("\n🔍 Prediction Example")
example_input = [90, 42, 43, 20.8, 82.0, 6.5, 202.9]
recommendations = predict_crop(example_input)
for i, (crop, confidence) in enumerate(recommendations, 1):
    print(f"{i}. {crop.upper()} ({confidence:.2f}% confidence)")



🔍 Prediction Example
1. MANGO (47.96% confidence)
2. GRAPES (8.40% confidence)
3. COFFEE (7.42% confidence)


In [None]:
# =============== 9. REST API Server (Flask) ===================
"""
from flask import Flask, request, jsonify
import joblib
import numpy as np
import pandas as pd

app = Flask(__name__)

model = joblib.load('xgboost_crop_model.pkl')
scaler = joblib.load('feature_scaler.pkl')
label_encoder = joblib.load('label_encoder.pkl')
features = joblib.load('feature_names.pkl')

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    input_vals = [data[feature] for feature in features]
    input_df = pd.DataFrame([input_vals], columns=features)
    scaled = scaler.transform(input_df)
    probs = model.predict_proba(scaled)[0]
    top_indices = np.argsort(probs)[-3:][::-1]
    recommendations = [{"crop": label_encoder.classes_[i], "confidence": round(probs[i]*100, 2)} for i in top_indices]
    return jsonify({"recommendations": recommendations})

if __name__ == '__main__':
    app.run(debug=True)
"""

# =============== 10. Final Success Message ====================
print("\n✅ All steps completed. Model is trained and ready for deployment!")



✅ All steps completed. Model is trained and ready for deployment!
