In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)


In [12]:
df = pd.DataFrame()
try:
    df = pd.read_csv('datasets/Crop_recommendation.csv')
    print(f"Dataset loaded successfully. Shape: {df.shape}")
except FileNotFoundError:
    print("ERROR ==== Dataset not found. Please download from Kaggle and place in ../data/ folder")
    print("URL: https://www.kaggle.com/datasets/madhuraatmarambhagat/crop-recommendation-dataset")

Dataset loaded successfully. Shape: (2200, 8)


In [13]:
# Prepare features and target
X = df.drop('label', axis=1)
y = df['label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(f"\nFeatures shape: {X.shape}")
print(f"Target classes: {len(np.unique(y_encoded))}")
print(f"Class names: {label_encoder.classes_}")

# Split the data: 60% train, 20% validation, 20% test
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp  # 0.25 * 0.8 = 0.2
)

print(f"\nData split:")
print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")



Features shape: (2200, 7)
Target classes: 22
Class names: ['apple' 'banana' 'blackgram' 'chickpea' 'coconut' 'coffee' 'cotton'
 'grapes' 'jute' 'kidneybeans' 'lentil' 'maize' 'mango' 'mothbeans'
 'mungbean' 'muskmelon' 'orange' 'papaya' 'pigeonpeas' 'pomegranate'
 'rice' 'watermelon']

Data split:
Training set: 1320 samples
Validation set: 440 samples
Test set: 440 samples


In [14]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Initialize models
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'XGBoost': xgb.XGBClassifier(random_state=42, eval_metric='mlogloss'),
    'SVM': SVC(random_state=42, probability=True),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5)
}

# Store results
results = {}
model_objects = {}

print("\nTraining and evaluating models...")
print("=" * 50)

# Train and evaluate each model
for name, model in models.items():
    print(f"\nTraining {name}...")
    
    # Use scaled data for models that benefit from it
    if name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
        X_train_use = X_train_scaled
        X_val_use = X_val_scaled
        X_test_use = X_test_scaled
    else:
        X_train_use = X_train
        X_val_use = X_val
        X_test_use = X_test
    
    # Train the model
    model.fit(X_train_use, y_train)
    
    # Make predictions
    y_train_pred = model.predict(X_train_use)
    y_val_pred = model.predict(X_val_use)
    y_test_pred = model.predict(X_test_use)
    
    # Calculate metrics
    train_accuracy = accuracy_score(y_train, y_train_pred)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    train_f1 = f1_score(y_train, y_train_pred, average='weighted')
    val_f1 = f1_score(y_val, y_val_pred, average='weighted')
    test_f1 = f1_score(y_test, y_test_pred, average='weighted')
    
    # Store results
    results[name] = {
        'train_accuracy': train_accuracy,
        'val_accuracy': val_accuracy,
        'test_accuracy': test_accuracy,
        'train_f1': train_f1,
        'val_f1': val_f1,
        'test_f1': test_f1
    }
    
    model_objects[name] = model
    
    print(f"Train Accuracy: {train_accuracy:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    print(f"Test F1-Score: {test_f1:.4f}")



Training and evaluating models...

Training Logistic Regression...
Train Accuracy: 0.9758
Validation Accuracy: 0.9727
Test Accuracy: 0.9705
Test F1-Score: 0.9703

Training Random Forest...
Train Accuracy: 1.0000
Validation Accuracy: 0.9909
Test Accuracy: 0.9977
Test F1-Score: 0.9977

Training XGBoost...
Train Accuracy: 1.0000
Validation Accuracy: 0.9909
Test Accuracy: 0.9886
Test F1-Score: 0.9885

Training SVM...
Train Accuracy: 0.9833
Validation Accuracy: 0.9659
Test Accuracy: 0.9841
Test F1-Score: 0.9840

Training K-Nearest Neighbors...
Train Accuracy: 0.9788
Validation Accuracy: 0.9591
Test Accuracy: 0.9705
Test F1-Score: 0.9704


## Acessing Ollama 

In [15]:
import requests
import json

In [None]:
# Available models "llama2", "llama3.2:latest", "mistral", "gemma", "gpt4all"
OLLAMA_MODEL_TO_USE = "llama3.2:latest"  # Default model

def query_ollama(prompt, model=OLLAMA_MODEL_TO_USE):
    try:
        response = requests.post('http://localhost:11434/api/generate',
                               json={
                                   "model": model,
                                   "prompt": prompt,
                                   "stream": False
                               })
        if response.status_code == 200:
            return response.json()['response']
        else:
            return f"Error: Received status code {response.status_code}"
    except Exception as e:
        return f"Error connecting to Ollama: {str(e)}"

def get_llm_recommendation(soil_data, predictions, confidences):
    prompt = f"""
    As an agricultural expert, analyze these soil and climate conditions:
    - Nitrogen: {soil_data[0]} mg/kg
    - Phosphorus: {soil_data[1]} mg/kg
    - Potassium: {soil_data[2]} mg/kg
    - Temperature: {soil_data[3]}°C
    - Humidity: {soil_data[4]}%
    - pH: {soil_data[5]}
    - Rainfall: {soil_data[6]} mm

    The machine learning model suggests these crops (with confidence scores):
    {', '.join([f'{crop} ({conf:.1f}%)' for crop, conf in zip(predictions, confidences)])}

    Provide:
    1. Brief analysis of soil and climate conditions
    2. Why the suggested crops are suitable
    3. Quick recommendations for optimal growth
    4. Key challenges to watch for
    Keep response under 200 words.
    """
    
    return query_ollama(prompt)

def predict_crop_with_llm(N, P, K, temperature, humidity, ph, rainfall):
    
    input_features = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    
    # Get prediction probabilities
    probabilities = rf_model.predict_proba(input_features)[0]
    
    # Get top 3 predictions
    top_3_indices = np.argsort(probabilities)[-3:][::-1]
    top_3_crops = label_encoder.inverse_transform(top_3_indices)
    top_3_probs = probabilities[top_3_indices] * 100
    
    # Get LLM insights
    llm_analysis = get_llm_recommendation(
        [N, P, K, temperature, humidity, ph, rainfall],
        top_3_crops,
        top_3_probs
    )
    
    # Format output
    output = "## 🤖 Machine Learning Recommendations\n"
    output += "**Top 3 Recommended Crops:**\n"
    for i, (crop, prob) in enumerate(zip(top_3_crops, top_3_probs), 1):
        output += f"{i}. {crop}: {prob:.1f}%\n"
    
    output += f"\n## 🧠 Expert Analysis\n{llm_analysis}"
    
    return output



### Testing ollama

In [20]:
# testing ollama
soil_data = [90, 60, 50, 30, 70, 6.5, 200]  # Example soil data
predictions = ['Wheat', 'Rice', 'Maize']  # Example predictions
confidences = [85.0, 75.0, 65.0]  # Example confidence scores
llm_response = get_llm_recommendation(soil_data, predictions, confidences)
print("\nLLM Response:")
print(llm_response)


LLM Response:
**Analysis**

The soil condition has moderate levels of nitrogen, phosphorus, and potassium, indicating a relatively fertile terrain. The pH level is within the suitable range for most crops, suggesting good overall fertility. The temperature and humidity are favorable for many crops, with the rainfall being sufficient.

**Suitability of suggested crops**

Wheat (85.0%) is a suitable crop due to its adaptability to moderate temperatures and precipitation levels.
Rice (75.0%) can thrive in this climate, although it may require additional irrigation.
Maize (65.0%) can grow well in this environment, but it may be vulnerable to temperature fluctuations.

**Optimal growth recommendations**

1. Adjust nitrogen fertilizer application based on crop requirements.
2. Monitor and manage water resources effectively due to the moderate rainfall.
3. Provide shade for sensitive crops like maize during peak temperatures.
4. Implement conservation tillage practices to reduce soil erosion

## Interfacing with Gradio


In [21]:
import gradio as gr
import numpy as np

# Get the Random Forest model (assuming it's one of our trained models)
rf_model = model_objects.get('Random Forest')
if rf_model is None:
    # If Random Forest wasn't in our comparison, train a quick one
    from sklearn.ensemble import RandomForestClassifier
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)


# Create Gradio interface
with gr.Blocks(title="Crop Recommendation System", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🌾 Crop Recommendation System")
    gr.Markdown("Enter soil and climate conditions to get personalized crop recommendations based on machine learning analysis.")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 🧪 Soil Nutrients (mg/kg)")
            N = gr.Slider(minimum=0, maximum=150, value=50, label="Nitrogen (N)", info="Essential for leaf growth")
            P = gr.Slider(minimum=0, maximum=150, value=50, label="Phosphorus (P)", info="Important for root development")
            K = gr.Slider(minimum=0, maximum=250, value=50, label="Potassium (K)", info="Helps with disease resistance")
            
        with gr.Column():
            gr.Markdown("### 🌡️ Climate Conditions")
            temperature = gr.Slider(minimum=8, maximum=45, value=25, label="Temperature (°C)", info="Average temperature")
            humidity = gr.Slider(minimum=14, maximum=100, value=65, label="Humidity (%)", info="Relative humidity")
            ph = gr.Slider(minimum=3.5, maximum=10, value=6.5, step=0.1, label="Soil pH", info="Soil acidity/alkalinity")
            rainfall = gr.Slider(minimum=20, maximum=300, value=100, label="Rainfall (mm)", info="Annual rainfall")
    
    with gr.Row():
        predict_btn = gr.Button("🔍 Get Crop Recommendation", variant="primary", size="lg")
    
    with gr.Row():
        output = gr.Markdown(label="Recommendation")
    
    predict_btn.click(
        fn=predict_crop_with_llm,
        inputs=[N, P, K, temperature, humidity, ph, rainfall],
        outputs=output
    ) 
    # Add some example scenarios
    gr.Markdown("### 📋 Try These Example Scenarios:")
    
    examples = [
        [90, 42, 43, 20.9, 82.0, 6.5, 202.9],  # Rice conditions
        [83, 45, 60, 27.0, 70.0, 7.0, 150.0],  # Wheat conditions
        [40, 70, 40, 25.0, 80.0, 6.0, 180.0],  # Cotton conditions
    ]
    
    gr.Examples(
        examples=examples,
        inputs=[N, P, K, temperature, humidity, ph, rainfall],
        outputs=output,
        fn=predict_crop,
        cache_examples=True
    )

# Launch the app
print("🚀 Launching Crop Recommendation System...")
demo.launch(share=True, debug=True)


🚀 Launching Crop Recommendation System...
* Running on local URL:  http://127.0.0.1:7860
Using cache from '/Users/victorhugogermano/Development/aai-510-final-project-group4/.gradio/cached_examples/45' directory. If method or examples have changed since last caching, delete this folder to clear cache.

* Running on public URL: https://a1a00da77dfb2cdc75.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a1a00da77dfb2cdc75.gradio.live


