In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [47]:
# Load datasets
crop_yield = pd.read_csv("data/Crop_Yield.csv")
rainfall = pd.read_csv("data/Rainfall.csv")
crop_price = pd.read_csv("data/Crop Prices.csv")
crop_data = pd.read_csv("data/Crop Nutrient.csv")
crop_df = pd.read_csv("data/crop_data.csv")

In [3]:
crop_yield['crop'].unique()

array(['Maize', 'Onion', 'Potato', 'Rice', 'Sugarcane', 'Wheat', 'Bajra',
       'Groundnut', 'Barley', 'Sunflower'], dtype=object)

### Preprocess Crop Price Dataset

In [4]:
# Rename columns to lowercase and remove spaces
crop_price.columns = crop_price.columns.str.lower().str.replace(" ", "_")

In [5]:
# Drop 'quantity' column since it contains only "Quintal" text values
if "quantity" in crop_price.columns:
    crop_price = crop_price.drop(columns=["quantity"])
    print("✅ Dropped 'quantity' column as it was not useful.")

✅ Dropped 'quantity' column as it was not useful.


In [6]:
# Convert "year" to datetime format
crop_price["year"] = pd.to_datetime(crop_price["year"], errors="coerce", format="%d-%m-%Y")

# Extract only the year
crop_price["year"] = crop_price["year"].dt.year

print(crop_price[["year"]].head())  # Check the output


   year
0  2018
1  2018
2  2018
3  2019
4  2019


In [7]:
# Convert "year" to integer
crop_price["year"] = pd.to_numeric(crop_price["year"], errors="coerce")

# Select numerical columns for scaling
num_cols_price = ["min_price_(?/quintal)", "max_price_(?/quintal)", "modal_price_(?/quintal)"]


In [8]:
# Apply MinMax Scaling
scaler_price = MinMaxScaler()
crop_price[num_cols_price] = scaler_price.fit_transform(crop_price[num_cols_price])

print("✅ Crop Price dataset processed successfully!")

✅ Crop Price dataset processed successfully!


In [9]:
crop_price.head()

Unnamed: 0,state,district,market_(mandi),commodity,min_price_(?/quintal),max_price_(?/quintal),modal_price_(?/quintal),year
0,Andhra Pradesh,Guntur,Guntur Mandi,Rice,0.009423,0.008784,0.009011,2018
1,Andhra Pradesh,Guntur,Guntur Mandi,Chilli,0.069494,0.067037,0.06806,2018
2,Andhra Pradesh,Nellore,Nellore Mandi,Cotton,0.025324,0.021526,0.023132,2018
3,Andhra Pradesh,Guntur,Guntur Mandi,Rice,0.010012,0.009239,0.009525,2019
4,Andhra Pradesh,Guntur,Guntur Mandi,Chilli,0.075383,0.071588,0.073195,2019


### Preprocess Rainfall Dataset

In [10]:
# Rename columns to lowercase and remove spaces
rainfall.columns = rainfall.columns.str.lower().str.replace(" ", "_")


In [11]:
# Select only numeric columns
num_cols_rainfall = rainfall.select_dtypes(include=["number"]).columns

# Fill missing values only in numeric columns using median
rainfall[num_cols_rainfall] = rainfall[num_cols_rainfall].fillna(rainfall[num_cols_rainfall].median())

print("✅ Missing values handled successfully!")


✅ Missing values handled successfully!


In [12]:
# Select numerical columns for scaling
num_cols_rainfall = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", "annual_rainfall"]


In [13]:
# Apply MinMax Scaling
scaler_rainfall = MinMaxScaler()
rainfall[num_cols_rainfall] = scaler_rainfall.fit_transform(rainfall[num_cols_rainfall])

print("✅ Rainfall dataset processed successfully!")

✅ Rainfall dataset processed successfully!


In [14]:
rainfall.head()

Unnamed: 0,state,year,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec,annual_rainfall,jf,mam,jjas,ond
0,Andaman & Nicobar Islands,1901,0.08429,0.215861,0.048217,0.003865,0.452507,0.32128,0.15452,0.289018,0.272117,0.40968,0.860225,0.054413,0.528155,136.3,560.3,1696.3,980.3
1,Andaman & Nicobar Islands,1902,0.0,0.396035,0.020145,0.0,0.381739,0.333458,0.096877,0.452781,0.545135,0.207951,0.553244,0.259919,0.551685,159.8,458.3,2185.9,716.7
2,Andaman & Nicobar Islands,1903,0.021758,0.356877,0.0,0.00168,0.201181,0.297919,0.308278,0.196263,0.277355,0.191079,0.43828,0.364372,0.461827,156.7,236.1,1874.0,690.6
3,Andaman & Nicobar Islands,1904,0.016104,0.036431,0.0,0.340111,0.260568,0.307363,0.21246,0.096179,0.671332,0.234314,0.475728,0.064939,0.48132,24.1,506.9,1977.6,571.0
4,Andaman & Nicobar Islands,1905,0.002227,0.0,0.005449,0.045202,0.239175,0.39037,0.156044,0.198546,0.242982,0.274913,0.039143,0.558219,0.399502,1.3,309.7,1624.9,630.8


### Preprocess Crop Yield Dataset

In [15]:
# Rename columns to lowercase and remove spaces
crop_yield.columns = crop_yield.columns.str.lower().str.replace(" ", "_")

# Select numerical columns for scaling
num_cols_yield = ["area", "production", "annual_rainfall", "yield"]

In [16]:
# Apply MinMax Scaling
scaler_yield = MinMaxScaler()
crop_yield[num_cols_yield] = scaler_yield.fit_transform(crop_yield[num_cols_yield])

print("✅ Crop Yield dataset processed successfully!")

✅ Crop Yield dataset processed successfully!


### Preprocess Crop Yield Dataset

In [17]:
crop_data.columns

Index(['N', 'P', 'K', 'Temperature', 'Humidity', 'pH', 'Rainfall', 'Crop'], dtype='object')

In [18]:
# Rename columns to lowercase and remove spaces
crop_data.columns = crop_data.columns.str.lower().str.replace(" ", "_")

# Select numerical columns for scaling
num_cols_crop = ['n', 'p', 'k', 'temperature', 'humidity', 'ph', 'rainfall']


In [19]:
# Apply MinMax Scaling
scaler_crop = MinMaxScaler()
crop_data[num_cols_crop] = scaler_crop.fit_transform(crop_data[num_cols_crop])

print("✅ Crop Data dataset processed successfully!")

✅ Crop Data dataset processed successfully!


In [20]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Initialize Label Encoders
label_enc = LabelEncoder()

In [21]:
# 🚜 Crop Yield Dataset
crop_yield["crop"] = label_enc.fit_transform(crop_yield["crop"])
crop_yield["season"] = label_enc.fit_transform(crop_yield["season"])
crop_yield["state"] = label_enc.fit_transform(crop_yield["state"])

# 🌧️ Rainfall Dataset
rainfall["state"] = label_enc.fit_transform(rainfall["state"])  # Convert state to numerical

# 💰 Crop Prices Dataset
crop_price["state"] = label_enc.fit_transform(crop_price["state"])
crop_price["district"] = label_enc.fit_transform(crop_price["district"])
crop_price["market_(mandi)"] = label_enc.fit_transform(crop_price["market_(mandi)"])
crop_price["commodity"] = label_enc.fit_transform(crop_price["commodity"])



In [22]:
print(crop_price.columns)


Index(['state', 'district', 'market_(mandi)', 'commodity',
       'min_price_(?/quintal)', 'max_price_(?/quintal)',
       'modal_price_(?/quintal)', 'year'],
      dtype='object')


In [23]:
# Convert Year column to numeric (if not already)
crop_price["year"] = pd.to_numeric(crop_price["year"], errors="coerce")
rainfall["year"] = pd.to_numeric(rainfall["year"], errors="coerce")
crop_yield["year"] = pd.to_numeric(crop_yield["year"], errors="coerce")


In [24]:
from sklearn.model_selection import train_test_split

# 🚜 Crop Yield Dataset
X_yield = crop_yield.drop(columns=["yield"])  # Features
y_yield = crop_yield["yield"]  # Target variable
X_train_yield, X_test_yield, y_train_yield, y_test_yield = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42)

# 🌧️ Rainfall Dataset
X_rainfall = rainfall.drop(columns=["annual_rainfall"])  # Features
y_rainfall = rainfall["annual_rainfall"]  # Target variable
X_train_rainfall, X_test_rainfall, y_train_rainfall, y_test_rainfall = train_test_split(X_rainfall, y_rainfall, test_size=0.2, random_state=42)

# 💰 Crop Prices Dataset
X_price = crop_price.drop(columns=["modal_price_(?/quintal)"])  # Features
y_price = crop_price["modal_price_(?/quintal)"]  # Target variable
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X_price, y_price, test_size=0.2, random_state=42)


print("✅ Training & Testing sets created successfully!")


✅ Training & Testing sets created successfully!


In [25]:
from sklearn.ensemble import RandomForestRegressor

# Initialize models
model_yield = RandomForestRegressor(n_estimators=100, random_state=42)
model_rainfall = RandomForestRegressor(n_estimators=100, random_state=42)
model_price = RandomForestRegressor(n_estimators=100, random_state=42)

# Train models
model_yield.fit(X_train_yield, y_train_yield)
model_rainfall.fit(X_train_rainfall, y_train_rainfall)
model_price.fit(X_train_price, y_train_price)

print("✅ Models trained successfully!")


✅ Models trained successfully!


In [26]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Function to evaluate model
def evaluate_model(model, X_test, y_test, dataset_name):
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"📌 {dataset_name} Model Performance:")
    print(f"🔹 Mean Absolute Error (MAE): {mae:.4f}")
    print(f"🔹 Mean Squared Error (MSE): {mse:.4f}")
    print(f"🔹 R² Score: {r2:.4f}")
    print("-" * 50)

# Evaluate all models
evaluate_model(model_yield, X_test_yield, y_test_yield, "Crop Yield")
evaluate_model(model_rainfall, X_test_rainfall, y_test_rainfall, "Rainfall")
evaluate_model(model_price, X_test_price, y_test_price, "Crop Price")

print("✅ Model evaluation completed!")


📌 Crop Yield Model Performance:
🔹 Mean Absolute Error (MAE): 0.0014
🔹 Mean Squared Error (MSE): 0.0000
🔹 R² Score: 0.9272
--------------------------------------------------
📌 Rainfall Model Performance:
🔹 Mean Absolute Error (MAE): 0.0086
🔹 Mean Squared Error (MSE): 0.0007
🔹 R² Score: 0.9730
--------------------------------------------------
📌 Crop Price Model Performance:
🔹 Mean Absolute Error (MAE): 0.0016
🔹 Mean Squared Error (MSE): 0.0001
🔹 R² Score: 0.9963
--------------------------------------------------
✅ Model evaluation completed!


In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [28]:
# Load the dataset
file_path = "data/Crop Nutrient.csv"
df = pd.read_csv(file_path)

In [29]:
df.columns

Index(['N', 'P', 'K', 'Temperature', 'Humidity', 'pH', 'Rainfall', 'Crop'], dtype='object')

In [30]:
df['Crop'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [31]:
# Encode categorical target variable (Crop)
label_encoder = LabelEncoder()
df["Crop"] = label_encoder.fit_transform(df["Crop"])

In [32]:
# Split features and target
X = df.drop(columns=["Crop"])
y = df["Crop"]

In [33]:
# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [34]:
# Initialize and train XGBoost Classifier
xgb_model = XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=6, subsample=0.8, colsample_bytree=0.8, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions
y_pred_xgb = xgb_model.predict(X_test)

# Model Evaluation
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"📌 **XGBoost Classifier Accuracy:** {accuracy_xgb * 100:.2f}%")

📌 **XGBoost Classifier Accuracy:** 99.55%


In [35]:
print("\n🔹 **Classification Report:**")
print(classification_report(y_test, y_pred_xgb, target_names=label_encoder.classes_))



🔹 **Classification Report:**
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      0.95      0.97        20
       maize       1.00      1.00      1.00        20
       mango       1.00      1.00      1.00        20
   mothbeans       0.95      1.00      0.98        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00      1.00      1.00        

In [50]:
import pickle
from sklearn.preprocessing import LabelEncoder

# Define file paths with "predict" added
model_paths = {
    "crop_yield": "models/crop_yield_predict.pkl",
    "rainfall": "models/rainfall_predict.pkl",
    "crop_price": "models/crop_price_predict.pkl",
    "best_crop": "models/best_crop_predict.pkl",
    "encoders": "models/encoders_predict.pkl"
}

# Create encoders
state_encoder = LabelEncoder()
crop_encoder = LabelEncoder()
soil_encoder = LabelEncoder()

# Fit encoders using the categorical columns from datasets
state_encoder.fit(crop_df["STATE"])
crop_encoder.fit(crop_df["CROP"])
soil_encoder.fit(crop_df["SOIL_TYPE"])

# Save models
with open(model_paths["crop_yield"], "wb") as file:
    pickle.dump(model_yield, file)

with open(model_paths["rainfall"], "wb") as file:
    pickle.dump(model_rainfall, file)

with open(model_paths["crop_price"], "wb") as file:
    pickle.dump(model_price, file)

with open(model_paths["best_crop"], "wb") as file:
    pickle.dump(xgb_model, file)

# Save encoders
encoders = {
    "state_encoder": state_encoder,
    "crop_encoder": crop_encoder,
    "soil_encoder": soil_encoder
}

with open(model_paths["encoders"], "wb") as file:
    pickle.dump(encoders, file)

print("✅ All models and encoders saved successfully with 'predict' in filenames!")


✅ All models and encoders saved successfully with 'predict' in filenames!


In [None]:
import pickle
import numpy as np

# Define file paths
model_paths = {
    "crop_yield": "models/crop_yield_predict.pkl",
    "rainfall": "models/rainfall_predict.pkl",
    "crop_price": "models/crop_price_predict.pkl",
    "best_crop": "models/best_crop_predict.pkl",
    "encoders": "models/encoders_predict.pkl"
}

# Load models
with open(model_paths["crop_yield"], "rb") as file:
    model_yield = pickle.load(file)

with open(model_paths["rainfall"], "rb") as file:
    model_rainfall = pickle.load(file)

with open(model_paths["crop_price"], "rb") as file:
    model_price = pickle.load(file)

with open(model_paths["best_crop"], "rb") as file:
    model_crop = pickle.load(file)

# Load encoders
with open(model_paths["encoders"], "rb") as file:
    encoders = pickle.load(file)

state_encoder = encoders["state_encoder"]
crop_encoder = encoders["crop_encoder"]
soil_encoder = encoders["soil_encoder"]

# Take input
state = input("Enter State: ")
crop = input("Enter Crop: ")
soil = input("Enter Soil Type: ")
rainfall = float(input("Enter Rainfall (mm): "))

# Encode categorical values
state_encoded = state_encoder.transform([state])[0]
crop_encoded = crop_encoder.transform([crop])[0]
soil_encoded = soil_encoder.transform([soil])[0]

# Make Predictions
best_crop = model_crop.predict([[state_encoded, soil_encoded, rainfall]])[0]
expected_yield = model_yield.predict([[state_encoded, crop_encoded, soil_encoded, rainfall]])[0]
predicted_price = model_price.predict([[state_encoded, crop_encoded]])[0]
predicted_rainfall = model_rainfall.predict([[state_encoded]])[0]

# Decode best crop
best_crop_decoded = crop_encoder.inverse_transform([int(best_crop)])[0]

# Output results
print("\n🌾 **Prediction Results** 🌾")
print(f"✅ **Best Crop Recommendation:** {best_crop_decoded}")
print(f"🌱 **Expected Yield:** {expected_yield:.2f} kg/ha")
print(f"💰 **Market Price:** ₹{predicted_price:.2f}")
print(f"☔ **Predicted Rainfall:** {predicted_rainfall:.2f} mm")
