In [1]:
# Import Python Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.compose import ColumnTransformer

# Load the dataset
filtered_dataset = pd.read_csv('/kaggle/input/maharashtra-fertilizer-new-data-set/filtered_dataset.csv')

# List of fertilizers (classes) to remove
classes_to_remove = [
    'Potassium Nitrate', 'Ammonium Nitrate', 'Ammonium Sulphate', 
    'Calcium Ammonium Nitrate', 'Ammonium Chloride', 'Superphosphate', 'White Potash'
]

# Filter out the rows where Fertilizer column matches any of the classes to remove
filtered_dataset = filtered_dataset[~filtered_dataset['Fertilizer'].isin(classes_to_remove)]

# Define X (features) and y (target)
y = filtered_dataset['Fertilizer']
X = filtered_dataset.drop('Fertilizer', axis=1)

In [2]:
# Display the number of unique fertilizers
unique_fertilizers = filtered_dataset['Fertilizer'].nunique()
print(f"Number of unique fertilizers: {unique_fertilizers}")

# Print unique fertilizer names
unique_fertilizers = filtered_dataset['Fertilizer'].unique()
print("Unique Fertilizer Names:")
for fertilizer in unique_fertilizers:
    print(fertilizer)

# Display the number of unique crops
unique_crops = filtered_dataset['Crop'].nunique()
print(f"Number of unique crops: {unique_crops}")

# Display the number of records per crop
records_per_crop = filtered_dataset['Crop'].value_counts()
records_per_fertilizer = filtered_dataset['Fertilizer'].value_counts()
print("Number of records per crop:")
print(records_per_crop)

print("Number of records per fertilizer:")
print(records_per_fertilizer)

Number of unique fertilizers: 15
Unique Fertilizer Names:
Urea
DAP
MOP
10:26:26 NPK
SSP
Magnesium Sulphate
13:32:26 NPK
12:32:16 NPK
50:26:26 NPK
19:19:19 NPK
Chilated Micronutrient
20:20:20 NPK
Ferrous Sulphate
10:10:10 NPK
Hydrated Lime
Number of unique crops: 13
Number of records per crop:
Sugarcane    1010
Wheat         859
Cotton        650
Jowar         394
Maize         350
Tomato        321
Rice          309
Groundnut     177
Grapes        125
Urad           99
Potato         70
Soybean        45
Turmeric       36
Name: Crop, dtype: int64
Number of records per fertilizer:
Urea                      1294
DAP                        594
19:19:19 NPK               593
MOP                        472
SSP                        351
Magnesium Sulphate         320
10:26:26 NPK               156
50:26:26 NPK               124
20:20:20 NPK               118
Chilated Micronutrient     108
12:32:16 NPK               106
Ferrous Sulphate            68
13:32:26 NPK                66
10:10:10 N

In [3]:
filtered_dataset

Unnamed: 0,Soil_color,Nitrogen,Phosphorus,Potassium,pH,Rainfall,Temperature,Crop,Fertilizer
0,Black,75.000000,50.000000,100.000000,6.500000,1000.000000,20.000000,Sugarcane,Urea
1,Black,80.000000,50.000000,100.000000,6.500000,1000.000000,20.000000,Sugarcane,Urea
2,Black,85.000000,50.000000,100.000000,6.500000,1000.000000,20.000000,Sugarcane,Urea
3,Black,90.000000,50.000000,100.000000,6.500000,1000.000000,20.000000,Sugarcane,Urea
4,Black,95.000000,50.000000,100.000000,6.500000,1000.000000,20.000000,Sugarcane,Urea
...,...,...,...,...,...,...,...,...,...
5065,Brown,205.602221,216.767919,225.916889,5.644855,844.328067,24.478102,Tomato,20:20:20 NPK
5067,Dark Brown,241.135365,203.460871,210.360539,6.188576,702.622815,22.120115,Tomato,20:20:20 NPK
5070,Dark Brown,216.192043,249.685739,225.454121,6.524119,626.639875,22.100307,Tomato,19:19:19 NPK
5071,Dark Brown,205.059803,202.770239,218.023601,5.661101,866.264184,21.527625,Tomato,19:19:19 NPK


In [4]:
#Count Null values
filtered_dataset.isnull().sum()

Soil_color     0
Nitrogen       0
Phosphorus     0
Potassium      0
pH             0
Rainfall       0
Temperature    0
Crop           0
Fertilizer     0
dtype: int64

In [5]:
#No. of Rows and Columns
filtered_dataset.shape

(4445, 9)

In [6]:
# Information
filtered_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4445 entries, 0 to 5072
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Soil_color   4445 non-null   object 
 1   Nitrogen     4445 non-null   float64
 2   Phosphorus   4445 non-null   float64
 3   Potassium    4445 non-null   float64
 4   pH           4445 non-null   float64
 5   Rainfall     4445 non-null   float64
 6   Temperature  4445 non-null   float64
 7   Crop         4445 non-null   object 
 8   Fertilizer   4445 non-null   object 
dtypes: float64(6), object(3)
memory usage: 347.3+ KB


In [7]:
#No. of Duplicates
filtered_dataset.duplicated().sum()

0

In [8]:
#Describe the data
filtered_dataset.describe()

Unnamed: 0,Nitrogen,Phosphorus,Potassium,pH,Rainfall,Temperature
count,4445.0,4445.0,4445.0,4445.0,4445.0,4445.0
mean,112.014499,68.722457,79.060552,6.700638,824.504443,25.456417
std,48.674286,46.326394,53.929184,0.634422,244.39537,5.78016
min,20.0,15.0,20.0,5.201647,300.0,10.0
25%,80.0,45.0,45.0,6.0,600.0,20.0
50%,115.0,60.0,60.0,6.5,800.0,25.0
75%,135.0,73.845867,110.0,7.0,1000.0,30.0
max,249.78014,249.789794,249.993622,8.5,1700.0,40.0


In [9]:
# Group by crop and check the number of unique fertilizers for each crop
fertilizer_mapping = filtered_dataset.groupby('Crop')['Fertilizer'].nunique()

# Display crops with more than one unique fertilizer
crops_with_multiple_fertilizers = fertilizer_mapping[fertilizer_mapping > 1]

# Output the result
if crops_with_multiple_fertilizers.empty:
    print("Each crop has exactly one fertilizer. No need for a separate fertilizer recommendation model.")
else:
    print("Some crops have multiple fertilizers:")
    print(crops_with_multiple_fertilizers)


Some crops have multiple fertilizers:
Crop
Cotton       4
Grapes       3
Groundnut    2
Jowar        3
Maize        3
Rice         2
Soybean      3
Sugarcane    3
Tomato       3
Turmeric     2
Urad         3
Wheat        2
Name: Fertilizer, dtype: int64


#Fertilizer Implementation

**#Fertilizer Implementation******

In [10]:
# Train-test split (should be done before any preprocessing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical columns
categorical_cols = ['Soil_color', 'Crop'] if 'Soil_color' in X.columns and 'Crop' in X.columns else []

# Create preprocessing transformer
if categorical_cols:
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', OneHotEncoder(drop='first', sparse=False), categorical_cols)
        ],
        remainder='passthrough'  # keep numerical columns as-is
    )
    
    # Apply preprocessing
    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)
    
    # Get feature names
    feature_names = preprocessor.get_feature_names_out()
    X_train = pd.DataFrame(X_train, columns=feature_names)
    X_test = pd.DataFrame(X_test, columns=feature_names)
else:
    print("Categorical columns not found - using raw features")





In [11]:
# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
# Predictions and evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


                        precision    recall  f1-score   support

          10:10:10 NPK       1.00      1.00      1.00        15
          10:26:26 NPK       0.69      0.96      0.81        28
          12:32:16 NPK       0.90      0.95      0.93        20
          13:32:26 NPK       1.00      1.00      1.00        17
          19:19:19 NPK       0.82      0.81      0.82       117
          20:20:20 NPK       0.30      0.40      0.34        20
          50:26:26 NPK       0.90      1.00      0.95        19
Chilated Micronutrient       0.82      1.00      0.90        23
                   DAP       1.00      0.93      0.97       119
      Ferrous Sulphate       1.00      1.00      1.00        14
         Hydrated Lime       1.00      1.00      1.00         6
                   MOP       1.00      1.00      1.00        92
    Magnesium Sulphate       0.79      0.77      0.78        70
                   SSP       1.00      0.82      0.90        61
                  Urea       0.97      

In [13]:
# Calculate training accuracy
training_accuracy = model.score(X_train, y_train)
print(f"Training Accuracy: {training_accuracy * 100:.2f}%")

# Calculate testing accuracy
testing_accuracy = model.score(X_test, y_test)
print(f"Testing Accuracy: {testing_accuracy * 100:.2f}%")


Training Accuracy: 99.94%
Testing Accuracy: 90.66%


In [14]:
import numpy as np

# Sample test data (custom input)
# Example: [Nitrogen, Phosphorus, Potassium, pH, Rainfall, Temperature, Soil_color, Crop]
sample_data = pd.DataFrame({
    'Nitrogen': [80],
    'Phosphorus': [50],
    'Potassium': [40],
    'pH': [6.5],
    'Rainfall': [100],
    'Temperature': [25],
    'Soil_color': ['Black'],
    'Crop': ['Wheat']
})

# Apply the same preprocessing to sample_data
sample_data_transformed = preprocessor.transform(sample_data)

# Convert to DataFrame with correct columns
sample_data_prepared = pd.DataFrame(sample_data_transformed, columns=X_train.columns)

# Ensure the input data matches the training feature columns
sample_data_prepared = sample_data_prepared.reindex(columns=X_train.columns, fill_value=0)

# Make a prediction
predicted_fertilizer = model.predict(sample_data_prepared)

print(f"Predicted Fertilizer: {predicted_fertilizer[0]}")


Predicted Fertilizer: Urea


In [15]:
import joblib

# Save the trained model in /kaggle/working/
joblib.dump(model, '/kaggle/working/rfFertilizer_model.joblib', compress=3)
print("Model saved successfully using joblib!")

# Save the preprocessor (instead of undefined 'encoder')
joblib.dump(preprocessor, '/kaggle/working/fertilizer_preprocessor.joblib')
print("Preprocessor saved successfully using joblib!")


Model saved successfully using joblib!
Preprocessor saved successfully using joblib!


In [16]:
!pip freeze > fertilizer_requirements.txt