In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Load and preprocess data
df = pd.read_csv("Crop_recommendation.csv")
le = LabelEncoder()
df["label"] = le.fit_transform(df["label"])
X = df.drop("label", axis=1).values
y = df["label"].values

# Train-test split and scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test)

# PyTorch Neural Network
class CropNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CropNN, self).__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, num_classes)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        return self.output(x)

# Initialize model, loss, and optimizer
input_size = X_train_scaled.shape[1]
num_classes = len(le.classes_)
model = CropNN(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train neural network
batch_size = 32
dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(100):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Evaluate neural network
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predictions = torch.max(outputs, 1)
    accuracy_nn = accuracy_score(y_test_tensor.numpy(), predictions.numpy())

# Traditional ML models
def train_evaluate(model, model_name):
    model.fit(X_train_scaled, y_train)
    pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, pred)
    print(f"{model_name:15} Accuracy: {acc:.4f}")
    return acc

# Train and compare all models
results = {
    "Neural Network": accuracy_nn,
    "Random Forest": train_evaluate(RandomForestClassifier(n_estimators=100), "Random Forest"),
    "XGBoost": train_evaluate(XGBClassifier(), "XGBoost"),
    "SVM": train_evaluate(SVC(kernel='rbf'), "SVM")
}

# Print final comparison
print("\n=== Final Accuracy Comparison ===")
for name, acc in results.items():
    print(f"{name:15} {acc:.4f}")

Random Forest   Accuracy: 0.9932
XGBoost         Accuracy: 0.9864
SVM             Accuracy: 0.9682

=== Final Accuracy Comparison ===
Neural Network  0.9568
Random Forest   0.9932
XGBoost         0.9864
SVM             0.9682


In [3]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load and preprocess data
df = pd.read_csv("Crop_recommendation.csv")
le = LabelEncoder()
df["label"] = le.fit_transform(df["label"])

# Split features and labels
X = df.drop("label", axis=1)
y = df["label"]

# Train final model on full dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize and train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_scaled, y)

# Save model and preprocessing objects
joblib.dump(rf_model, 'crop_rf_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le, 'label_encoder.pkl')

# ----------------------------------
# To make predictions (in a separate file):
# ----------------------------------
# Load saved artifacts
model = joblib.load('crop_rf_model.pkl')
scaler = joblib.load('scaler.pkl')
le = joblib.load('label_encoder.pkl')

# Sample input (N, P, K, temperature, humidity, ph, rainfall)
new_data = [[90, 42, 43, 20.88, 82.00, 6.50, 202.94]]  # Replace with your values

# Preprocess and predict
scaled_data = scaler.transform(new_data)
prediction = model.predict(scaled_data)

# Decode label
predicted_crop = le.inverse_transform(prediction)[0]
print(f"Recommended crop: {predicted_crop}")

Recommended crop: rice




In [8]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.717340,20
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,5
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,5
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,5
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,5


In [7]:
# Sample input (N, P, K, temperature, humidity, ph, rainfall)
new_data = [[10, 82, 83, .88, 82.00, 9.50, 102.94]]  # Replace with your values

# Preprocess and predict
scaled_data = scaler.transform(new_data)
prediction = model.predict(scaled_data)

# Decode label
predicted_crop = le.inverse_transform(prediction)[0]
print(f"the Recommended crop is: {predicted_crop}")

the Recommended crop is: chickpea


