In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [4]:
# Load the dataset
data = pd.read_csv('crop_recommendation_dataset_100000.csv')

# Display the first few rows of the dataset
print(data.head())

  Previous Crop Soil Type  Moisture Level  Nitrogen (N)  Phosphorus (P)  \
0       Tobacco    Clayey              28           310             101   
1          Rice    Saline              35           221              63   
2        Barley     Sandy              26           259              60   
3     Groundnut    Clayey              28           140              34   
4       Millets     Sandy              24           379             108   

   Potassium (K) Recommended Crop  
0            354          Millets  
1            680          Sorghum  
2            710          Millets  
3            414          Millets  
4            382          Millets  


In [5]:
# Create a consistent mapping for crops
crop_mapping = {
    'Wheat': 1,
    'Rice': 2,
    'Millets': 3,
    'Cotton': 4,
    'Groundnut': 5,
    'Maize': 6,
    'Sorghum': 7,
    'Barley': 8
}

# Check for unmapped crops
unmapped_crops = set(data['Recommended Crop']) - set(crop_mapping.keys())
if unmapped_crops:
    print("Unmapped crops found:", unmapped_crops)

# Map the recommended crops to integers
data['Recommended Crop'] = data['Recommended Crop'].map(crop_mapping)

# Drop rows with NaN values in 'Recommended Crop'
data = data.dropna(subset=['Recommended Crop'])

# Encoding categorical variables manually
previous_crop_mapping = {
    'Groundnut': 1,
    'Millets': 2,
    'Wheat': 3,
    'Maize': 4,
    'Cotton': 5,
    'Sorghum': 6,
    'Barley': 7,
    'Rice': 8
}

soil_type_mapping = {
    'Loamy': 1,
    'Clayey': 2,
    'Sandy': 3,
    'Saline': 4
}

Unmapped crops found: {'Soybean'}


In [6]:
data['Previous Crop'] = data['Previous Crop'].map(previous_crop_mapping)
data['Soil Type'] = data['Soil Type'].map(soil_type_mapping)

# Drop rows with NaN values after mapping
data = data.dropna()

In [7]:
# Define features and target variable
X = data.drop('Recommended Crop', axis=1)
y = data['Recommended Crop']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [14]:
def model_details():
  accuracy = accuracy_score(y_test, y_pred)
  confusion = confusion_matrix(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  print(f"Accuracy: {accuracy * 100:.2f}%")
  print("Confusion Matrix:")
  print(confusion)
  print("Classification Report:")
  print(report)

In [15]:
# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_model.pkl')

Accuracy: 97.20%
Confusion Matrix:
[[ 757    0    0    0    0]
 [   0  792    0    0    0]
 [   0    0 7965    0    0]
 [   0    0    0 2059    0]
 [   0    0    0    0  778]]
Classification Report:
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00       757
         2.0       1.00      1.00      1.00       792
         3.0       1.00      1.00      1.00      7965
         4.0       1.00      1.00      1.00      2059
         7.0       1.00      1.00      1.00       778

    accuracy                           1.00     12351
   macro avg       1.00      1.00      1.00     12351
weighted avg       1.00      1.00      1.00     12351



['crop_recommendation_model.pkl']