In [1]:
import pandas as pd

# Load the dataset
file_path = '/Users/vinuthnarajeswari/Desktop/soildataset.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Explore the dataset
print("Dataset Info:")
print(data.info())
print("\nFirst 5 rows of the dataset:")
print(data.head())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   N       620 non-null    int64  
 1   P       620 non-null    int64  
 2   K       620 non-null    int64  
 3   ph      620 non-null    float64
 4   EC      620 non-null    float64
 5   S       620 non-null    float64
 6   Cu      620 non-null    float64
 7   Fe      620 non-null    float64
 8   Mn      620 non-null    float64
 9   Zn      620 non-null    float64
 10  B       620 non-null    float64
 11  label   620 non-null    object 
dtypes: float64(8), int64(3), object(1)
memory usage: 58.2+ KB
None

First 5 rows of the dataset:
     N   P    K   ph    EC     S     Cu      Fe     Mn     Zn      B  \
0  143  69  217  5.9  0.58  0.23  10.20  116.35  59.96  54.85  21.29   
1  170  36  216  5.9  0.15  0.28  15.69  114.20  56.87  31.28  28.62   
2  158  66  219  6.8  0.34  0.20  15.29   65.87  

In [2]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Encode the target variable (Crop)
label_encoder = LabelEncoder()
if 'label' in data.columns:
    data['label'] = label_encoder.fit_transform(data['label'])

# Handle missing values
data = data.dropna()

# Separate features and target variable
X = data.drop(columns=['label'])  # Replace 'Crop' with the actual target column name
y = data['label']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.ensemble import RandomForestClassifier

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)



In [6]:
with open('crop_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [7]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluate the model
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        24
           1       0.94      0.94      0.94        18
           2       1.00      1.00      1.00        21
           3       1.00      0.95      0.98        22
           4       1.00      1.00      1.00        23
           5       0.94      1.00      0.97        16

    accuracy                           0.98       124
   macro avg       0.97      0.98      0.97       124
weighted avg       0.98      0.98      0.98       124

Accuracy: 0.9758064516129032


In [8]:
import pickle

# Save the model, scaler, and label encoder
model_file = 'crop_recommendation_model.pkl'
scaler_file = 'scaler.pkl'
label_encoder_file = 'label_encoder.pkl'

with open(model_file, 'wb') as f:
    pickle.dump(model, f)

with open(scaler_file, 'wb') as f:
    pickle.dump(scaler, f)

with open(label_encoder_file, 'wb') as f:
    pickle.dump(label_encoder, f)

print("Model, scaler, and label encoder saved successfully.")


Model, scaler, and label encoder saved successfully.


In [12]:
def recommend_crop(soil_features):
    # Load saved objects
    with open('crop_recommendation_model.pkl', 'rb') as f:
        loaded_model = pickle.load(f)
    with open('scaler.pkl', 'rb') as f:
        loaded_scaler = pickle.load(f)
    with open('label_encoder.pkl', 'rb') as f:
        loaded_label_encoder = pickle.load(f)

    # Validate feature length
    expected_features = loaded_scaler.n_features_in_
    if len(soil_features) != expected_features:
        raise ValueError(f"Expected {expected_features} features, but got {len(soil_features)}.")

    # Scale and predict
    scaled_features = loaded_scaler.transform([soil_features])
    predicted_class = loaded_model.predict(scaled_features)
    crop_name = loaded_label_encoder.inverse_transform(predicted_class)

    return crop_name[0]


# Example usage
# Replace these with actual feature values based on your dataset
example_soil = [6.5, 25, 0.5, 200, 50, 0.1, 10, 2, 40, 15, 1]  # Example for 11 features
print("Recommended Crop:", recommend_crop(example_soil))


Recommended Crop: potato




In [13]:
import pickle
from sklearn.ensemble import RandomForestClassifier

# Example RandomForest model
model = RandomForestClassifier()

# Fit the model (replace this with your actual model fitting code)
# model.fit(X_train, y_train)

# Save the model using pickle
with open('crop_model.pkl', 'wb') as f:
    pickle.dump(model, f)


In [14]:
import pickle

# Load the model from the .pkl file
with open('crop_model.pkl', 'rb') as f:
    crop_model = pickle.load(f)
