In [1]:
import zipfile
import os

# Define the path to the zip file
zip_file_path = '/content/Dataset.zip'

# Define the extraction directory
extraction_path = './unzipped_data'

# Create the extraction directory if it doesn't exist
os.makedirs(extraction_path, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

print(f"Successfully unzipped '{zip_file_path}' to '{extraction_path}'.")

Successfully unzipped '/content/Dataset.zip' to './unzipped_data'.


In [2]:
import pandas as pd

# -------------------------------
# Load All Four Datasets
# -------------------------------

feature_store = pd.read_csv("/content/unzipped_data/merged_feature_store.csv")
multilingual_adv = pd.read_csv("/content/unzipped_data/Multilingual_Expert_Advisory.csv")
smart_reports = pd.read_csv("/content/unzipped_data/Smart_Advisory_Reports_All.csv")
decadal_master = pd.read_csv("/content/unzipped_data/Unified_Decadal_Master_2015_2024.csv")

print("✅ DATASETS LOADED SUCCESSFULLY\n")

# -------------------------------
# Display Basic Info
# -------------------------------

datasets = {
    "Feature Store": feature_store,
    "Multilingual Advisory": multilingual_adv,
    "Smart Reports": smart_reports,
    "Decadal Master": decadal_master
}

for name, df in datasets.items():
    print("="*70)
    print(f"DATASET: {name}")
    print("Shape:", df.shape)
    print("\nColumns:\n", df.columns.tolist())
    print("\nMissing Values:\n", df.isnull().sum().head(10))
    print("="*70)


✅ DATASETS LOADED SUCCESSFULLY

DATASET: Feature Store
Shape: (6720, 39)

Columns:
 ['year', 'region', 'state', 'district', 'lat', 'lon', 'rainfall_imd_mm', 'soil_moisture_historical', 'mean_temp_historical', 'ndvi_vegetation_index', 'soil_ph', 'soil_type', 'nitrogen', 'phosphorus', 'potassium', 'recommended_crop', 'historical_msp_inr', 'district_norm', 'state_norm', 'crop_norm', 'merge_key', 'advisory_matches', 'n_advisories', 'advisory_id', 'advisory_text_en', 'advisory_source', 'translations_available', 'translation_json', 'advisory_hindi', 'advisory_telugu', 'advisory_marathi', 'advisory_punjabi', 'advisory_tamil', 'data_source_unified', 'data_source_advisory', 'data_source_multilingual', 'merge_timestamp', 'data_license', 'curator']

Missing Values:
 year                        0
region                      0
state                       0
district                    0
lat                         0
lon                         0
rainfall_imd_mm             0
soil_moisture_historical

In [3]:
# =========================================================
# STEP 2 — DATA PREPARATION PIPELINE (ML + LLM)
# Project: AgriGuard AI
# =========================================================

import pandas as pd

print("🚀 Starting STEP 2 Data Preparation...\n")

# =========================================================
# PART A — LOAD DATASETS
# =========================================================

feature_store = pd.read_csv("/content/unzipped_data/merged_feature_store.csv")
smart_reports = pd.read_csv("/content/unzipped_data/Smart_Advisory_Reports_All.csv")
multilingual = pd.read_csv("/content/unzipped_data/Multilingual_Expert_Advisory.csv")

print("✅ All datasets loaded successfully")

# =========================================================
# PART B — MACHINE LEARNING DATASET PREPARATION
# =========================================================

print("\n🔧 Preparing ML Dataset...")

ml_features = [
    'lat',
    'lon',
    'rainfall_imd_mm',
    'soil_moisture_historical',
    'mean_temp_historical',
    'ndvi_vegetation_index',
    'soil_ph',
    'nitrogen',
    'phosphorus',
    'potassium',
    'historical_msp_inr'
]

target_column = 'rainfall_imd_mm'   # Climate Prediction Target

X_ml = feature_store[ml_features]
y_ml = feature_store[target_column]

print("✅ ML Features Shape:", X_ml.shape)
print("✅ ML Target Shape:", y_ml.shape)

# Save ML dataset (optional backup)
X_ml.to_csv("agri_ml_features.csv", index=False)
y_ml.to_csv("agri_ml_target.csv", index=False)

print("📁 ML datasets saved locally")

# =========================================================
# PART C — LLM TRAINING DATASET PREPARATION
# =========================================================

print("\n🔧 Preparing LLM Instruction Dataset...")

llm_records = []

for _, row in smart_reports.iterrows():

    prompt = f"""
District: {row['District']}
State: {row['State']}
Rainfall: {row['Rainfall_IMD_mm']} mm
Temperature: {row['Mean_Temp_Historical']} °C
Soil Moisture: {row['Soil_Moisture_Historical']}
NDVI: {row['NDVI_Vegetation_Index']}
Soil pH: {row['Soil_pH']}
Nitrogen: {row['Nitrogen']}
Phosphorus: {row['Phosphorus']}
Potassium: {row['Potassium']}
Recommended Crop: {row['Recommended_Crop']}
"""

    response = row['Expert_Advisory']

    llm_records.append({
        "input": prompt.strip(),
        "output": response.strip()
    })

llm_df = pd.DataFrame(llm_records)

print("✅ LLM Instruction Dataset Shape:", llm_df.shape)

# Save for LLM training
llm_df.to_json("agri_llm_train.json", orient="records", lines=True)

print("📁 LLM Training File Saved: agri_llm_train.json")

# =========================================================
# PART D — MULTILINGUAL EXTENSION DATASET (OPTIONAL)
# =========================================================

print("\n🌍 Preparing Multilingual Dataset (Optional Layer)...")

multi_records = []

for _, row in multilingual.iterrows():

    base_context = f"""
District: {row['District']}
State: {row['State']}
Rainfall: {row['Rainfall_IMD_mm']} mm
Temperature: {row['Mean_Temp_Historical']} °C
Soil Moisture: {row['Soil_Moisture_Historical']}
Soil pH: {row['Soil_pH']}
Crop: {row['Recommended_Crop']}
"""

    if pd.notna(row['Advisory_Hindi']):
        multi_records.append({"input": base_context, "output": row['Advisory_Hindi'], "lang": "hi"})

    if pd.notna(row['Advisory_Telugu']):
        multi_records.append({"input": base_context, "output": row['Advisory_Telugu'], "lang": "te"})

    if pd.notna(row['Advisory_Tamil']):
        multi_records.append({"input": base_context, "output": row['Advisory_Tamil'], "lang": "ta"})

    if pd.notna(row['Advisory_Marathi']):
        multi_records.append({"input": base_context, "output": row['Advisory_Marathi'], "lang": "mr"})

    if pd.notna(row['Advisory_Punjabi']):
        multi_records.append({"input": base_context, "output": row['Advisory_Punjabi'], "lang": "pa"})


multi_llm_df = pd.DataFrame(multi_records)

multi_llm_df.to_json("agri_multilingual_llm.json", orient="records", lines=True)

print("📁 Multilingual Dataset Saved: agri_multilingual_llm.json")
print("Total Multilingual Samples:", multi_llm_df.shape)

# =========================================================
# FINAL SUMMARY
# =========================================================

print("\n==============================")
print("✅ STEP 2 COMPLETED SUCCESSFULLY")
print("==============================")
print("ML Dataset:", X_ml.shape)
print("LLM Dataset:", llm_df.shape)
print("Multilingual Dataset:", multi_llm_df.shape)


🚀 Starting STEP 2 Data Preparation...

✅ All datasets loaded successfully

🔧 Preparing ML Dataset...
✅ ML Features Shape: (6720, 11)
✅ ML Target Shape: (6720,)
📁 ML datasets saved locally

🔧 Preparing LLM Instruction Dataset...
✅ LLM Instruction Dataset Shape: (6690, 2)
📁 LLM Training File Saved: agri_llm_train.json

🌍 Preparing Multilingual Dataset (Optional Layer)...
📁 Multilingual Dataset Saved: agri_multilingual_llm.json
Total Multilingual Samples: (33450, 3)

✅ STEP 2 COMPLETED SUCCESSFULLY
ML Dataset: (6720, 11)
LLM Dataset: (6690, 2)
Multilingual Dataset: (33450, 3)


In [4]:
# =========================================================
# STEP 3 — CLIMATE ML MODEL TRAINING PIPELINE
# Project: AgriGuard AI
# =========================================================

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
import numpy as np

print("🚀 Starting STEP 3 ML Training...\n")

# =========================================================
# LOAD PREPARED ML DATA
# =========================================================

X = pd.read_csv("agri_ml_features.csv")
y = pd.read_csv("agri_ml_target.csv").values.ravel()

print("✅ Data Loaded")
print("Features:", X.shape)
print("Target:", y.shape)

# --- Start of fix: Ordinal Encoding for categorical features ---
# Define an ordinal mapping for soil nutrient levels
nutrient_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

# Apply the mapping to 'nitrogen', 'phosphorus', and 'potassium' columns
for col in ['nitrogen', 'phosphorus', 'potassium']:
    if col in X.columns:
        X[col] = X[col].map(nutrient_mapping)
    else:
        print(f"Warning: Column '{col}' not found in X. Skipping mapping.")
# --- End of fix ---

# =========================================================
# TRAIN TEST SPLIT
# =========================================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print("\n✅ Train-Test Split Done")
print("Train:", X_train.shape)
print("Test:", X_test.shape)

# =========================================================
# FEATURE SCALING
# =========================================================

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("✅ Feature Scaling Completed")

# =========================================================
# XGBOOST MODEL INITIALIZATION
# =========================================================

model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    tree_method="hist"
)

print("\n🚜 Training Climate Prediction Model...")

model.fit(X_train_scaled, y_train)

print("✅ Training Completed")

# =========================================================
# MODEL EVALUATION
# =========================================================

print("\n📊 Evaluating Model Performance...")

y_pred = model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("\n==============================")
print("📈 MODEL PERFORMANCE METRICS")
print("==============================")
print("MAE  :", round(mae, 3))
print("RMSE :", round(rmse, 3))
print("R2   :", round(r2, 4))

# =========================================================
# SAVE MODEL + SCALER
# =========================================================

joblib.dump(model, "agri_climate_model.pkl")
joblib.dump(scaler, "agri_scaler.pkl")

print("\n💾 Model Saved: agri_climate_model.pkl")
print("💾 Scaler Saved: agri_scaler.pkl")

# =========================================================
# FEATURE IMPORTANCE (FOR REPORT)
# =========================================================

importances = model.feature_importances_

feature_importance_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": importances
}).sort_values(by="Importance", ascending=False)

feature_importance_df.to_csv("feature_importance.csv", index=False)

print("\n📁 Feature Importance Saved: feature_importance.csv")

print("\n==============================")
print("✅ STEP 3 COMPLETED SUCCESSFULLY")
print("==============================")


🚀 Starting STEP 3 ML Training...

✅ Data Loaded
Features: (6720, 11)
Target: (6720,)

✅ Train-Test Split Done
Train: (5376, 11)
Test: (1344, 11)
✅ Feature Scaling Completed

🚜 Training Climate Prediction Model...
✅ Training Completed

📊 Evaluating Model Performance...

📈 MODEL PERFORMANCE METRICS
MAE  : 25.588
RMSE : 146.485
R2   : 1.0

💾 Model Saved: agri_climate_model.pkl
💾 Scaler Saved: agri_scaler.pkl

📁 Feature Importance Saved: feature_importance.csv

✅ STEP 3 COMPLETED SUCCESSFULLY


In [2]:
import pandas as pd

df = pd.read_json("agri_llm_train.json", lines=True)

print("Original samples:", df.shape)

# -----------------------------
# Helper Bucketing Functions
# -----------------------------

def rainfall_bucket(x):
    if x < 600:
        return "Low rainfall"
    elif x < 1000:
        return "Moderate rainfall"
    else:
        return "High rainfall"

def temp_bucket(x):
    if x < 20:
        return "Cool temperature"
    elif x < 30:
        return "Moderate temperature"
    else:
        return "High temperature"

def soil_ph_bucket(x):
    if x < 6:
        return "Acidic soil"
    elif x <= 7.5:
        return "Neutral soil"
    else:
        return "Alkaline soil"

# -----------------------------
# Parse numeric values from prompt text
# -----------------------------

clean_records = []

for i, row in df.iterrows():

    text = row["input"]

    try:
        rain = float(text.split("Rainfall:")[1].split("mm")[0])
        temp = float(text.split("Temperature:")[1].split("°")[0])
        ph = float(text.split("Soil pH:")[1].split("\n")[0])

        rain_cat = rainfall_bucket(rain)
        temp_cat = temp_bucket(temp)
        ph_cat = soil_ph_bucket(ph)

        crop = text.split("Recommended Crop:")[1].strip()

        new_prompt = f"""
Climate Summary:
{rain_cat}, {temp_cat}

Soil Summary:
{ph_cat}

Crop:
{crop}

Task:
Generate farmer advisory
"""

        clean_records.append({
            "input": new_prompt.strip(),
            "output": row["output"]
        })

    except:
        continue

clean_df = pd.DataFrame(clean_records)

print("Clean samples:", clean_df.shape)

clean_df.to_json("agri_llm_train_fixed.json", orient="records", lines=True)

print("✅ Fixed LLM dataset saved")


Original samples: (6690, 2)
Clean samples: (6690, 2)
✅ Fixed LLM dataset saved


In [5]:
!pip install -q transformers datasets peft accelerate sentencepiece bitsandbytes


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))


CUDA Available: True
GPU: Tesla T4


In [3]:
from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files="agri_llm_train_fixed.json"
)

print(dataset)


Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input', 'output'],
        num_rows: 6690
    })
})


In [4]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    device_map="auto"
)

print("✅ FLAN-T5 Loaded")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

✅ FLAN-T5 Loaded


In [5]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()


trainable params: 884,736 || all params: 248,462,592 || trainable%: 0.3561


In [6]:
max_length = 256

def tokenize(batch):

    # Encoder input
    model_inputs = tokenizer(
        batch["input"],
        truncation=True,
        padding="max_length",
        max_length=max_length
    )

    # Decoder target (CRITICAL FIX)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            batch["output"],
            truncation=True,
            padding="max_length",
            max_length=max_length
        )

    label_ids = labels["input_ids"]

    # Mask padding tokens
    label_ids = [
        [(tok if tok != tokenizer.pad_token_id else -100) for tok in seq]
        for seq in label_ids
    ]

    model_inputs["labels"] = label_ids

    return model_inputs


In [7]:
tokenized_dataset = dataset.map(
    tokenize,
    batched=True,
    remove_columns=dataset["train"].column_names
)

print("✅ Tokenization Completed")


Map:   0%|          | 0/6690 [00:00<?, ? examples/s]



✅ Tokenization Completed


In [8]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./agri_llm_fixed",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=2,
    learning_rate=2e-5,
    warmup_steps=200,
    lr_scheduler_type="cosine",
    fp16=False,
    max_grad_norm=0.5,
    logging_steps=50,
    save_steps=500,
    save_total_limit=1,
    report_to="none",
    remove_unused_columns=False
)


In [9]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"]
)

print("✅ Trainer Ready")


The model is already on multiple devices. Skipping the move to device specified in `args`.


✅ Trainer Ready


In [10]:
trainer.train()


Step,Training Loss
50,3.7853
100,3.7526
150,3.7057
200,3.5531
250,3.3854
300,3.2261
350,3.0573
400,2.9294
450,2.8084
500,2.7093


TrainOutput(global_step=838, training_loss=3.021247772726637, metrics={'train_runtime': 1454.0353, 'train_samples_per_second': 9.202, 'train_steps_per_second': 0.576, 'total_flos': 4599209499033600.0, 'train_loss': 3.021247772726637, 'epoch': 2.0})

In [11]:
model.save_pretrained("agri_llm_adapter_final")
tokenizer.save_pretrained("agri_llm_adapter_final")

print("✅ Final AgriGuard LLM Adapter Saved")


✅ Final AgriGuard LLM Adapter Saved


Step 5

In [48]:
import joblib
import torch
import requests
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


class AgriGuardAIEngine:

    def __init__(self,
                 ml_model_path="agri_climate_model.pkl",
                 scaler_path="agri_scaler.pkl",
                 llm_path="agri_llm_adapter_final"):

        print("\n🚀 Initializing AgriGuard AI Engine...")

        # =============================
        # Load Climate ML Components
        # =============================

        self.climate_model = joblib.load(ml_model_path)
        self.scaler = joblib.load(scaler_path)

        self.ml_feature_names = [
            'lat', 'lon', 'rainfall_imd_mm', 'soil_moisture_historical',
            'mean_temp_historical', 'ndvi_vegetation_index', 'soil_ph',
            'nitrogen', 'phosphorus', 'potassium', 'historical_msp_inr'
        ]

        self.nutrient_mapping = {
            "Low": 0,
            "Medium": 1,
            "High": 2
        }

        print("✅ Climate ML model and scaler loaded")

        # =============================
        # Load Advisory LLM
        # =============================

        self.llm_tokenizer = AutoTokenizer.from_pretrained(llm_path)

        self.llm_model = AutoModelForSeq2SeqLM.from_pretrained(
            llm_path,
            device_map="auto"
        )

        print("✅ Advisory LLM loaded")
        print("✅ AgriGuard AI Engine Ready")

    # ==================================================
    # Semantic Bucketing (LLM Friendly Representation)
    # ==================================================

    def _rainfall_bucket(self, x):
        if x < 600:
            return "Low rainfall"
        elif x < 1000:
            return "Moderate rainfall"
        else:
            return "High rainfall"

    def _temperature_bucket(self, x):
        if x < 20:
            return "Cool temperature"
        elif x < 30:
            return "Moderate temperature"
        else:
            return "High temperature"

    def _soil_ph_bucket(self, x):
        if x < 6:
            return "Acidic soil"
        elif x <= 7.5:
            return "Neutral soil"
        else:
            return "Alkaline soil"

    # ================================================
    # Real-Time Weather Fetch (Open-Meteo API)
    # ================================================

    def _get_realtime_weather(self, lat, lon):

        url = (
            f"https://api.open-meteo.com/v1/forecast?"
            f"latitude={lat}&longitude={lon}"
            f"&current_weather=true"
            f"&hourly=soil_moisture_0_1cm&forecast_days=1"
        )

        response = requests.get(url)
        data = response.json()

        current = data.get("current_weather", {})
        hourly = data.get("hourly", {})

        temperature = current.get("temperature", 25.0)

        soil_moisture = 0.35  # safe fallback

        if "soil_moisture_0_1cm" in hourly:
            soil_vals = hourly["soil_moisture_0_1cm"]
            if len(soil_vals) > 0:
                soil_moisture = soil_vals[0]

        return {
            "temperature": temperature,
            "soil_moisture": soil_moisture
        }

    # ================================================
    # Climate Prediction (ML Inference)
    # ================================================

    def _predict_climate(self, features_dict):

        processed_features = []

        for feature_name in self.ml_feature_names:
            value = features_dict.get(feature_name)

            if feature_name in ["nitrogen", "phosphorus", "potassium"]:
                processed_features.append(
                    self.nutrient_mapping.get(value, 1)
                )
            else:
                processed_features.append(value)

        # Convert to DataFrame (removes sklearn warning)
        feature_df = pd.DataFrame(
            [processed_features],
            columns=self.ml_feature_names
        )

        scaled_features = self.scaler.transform(feature_df)

        rainfall_prediction = self.climate_model.predict(scaled_features)[0]

        return rainfall_prediction

    # ================================================
    # Prompt Builder (Anti-Hallucination Design)
    # ================================================

    def _build_llm_prompt(self, rainfall, temperature, soil_ph, crop, language):

        rain_text = self._rainfall_bucket(rainfall)
        temp_text = self._temperature_bucket(temperature)
        soil_text = self._soil_ph_bucket(soil_ph)

        lang_text = language if language else "English"

        prompt = f"""
Farmer Situation:
Climate: {rain_text}, {temp_text}
Soil: {soil_text}
Crop: {crop}

Task:
Give 3 to 5 farming action steps.

Output Format:
- Step 1:
- Step 2:
- Step 3:

Language: {lang_text}

Answer:
"""

        return prompt.strip()

    # ================================================
    # Advisory Generation (Controlled Output)
    # ================================================

    def _generate_advisory(self, prompt):

        inputs = self.llm_tokenizer(prompt, return_tensors="pt").to("cuda")

        outputs = self.llm_model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            do_sample=True
        )

        response = self.llm_tokenizer.decode(
            outputs[0],
            skip_special_tokens=True
        )

        return response.strip()

    # ================================================
    # MAIN HYBRID PIPELINE
    # ================================================

    def run_pipeline(self, user_input):

        print("\n===============================")
        print(" Running AgriGuard AI Pipeline ")
        print("===============================")

        lat = user_input["lat"]
        lon = user_input["lon"]

        # 1️⃣ Real-time Weather
        realtime = self._get_realtime_weather(lat, lon)

        realtime_temp = realtime["temperature"]
        realtime_soil = realtime["soil_moisture"]

        print(f"🌡 Live Temperature: {realtime_temp}")
        print(f"💧 Live Soil Moisture: {realtime_soil}")

        # 2️⃣ Climate ML Prediction

        ml_features = {
            "lat": lat,
            "lon": lon,
            "rainfall_imd_mm": user_input["current_rainfall_observed"],
            "soil_moisture_historical": realtime_soil,
            "mean_temp_historical": realtime_temp,
            "ndvi_vegetation_index": user_input["ndvi"],
            "soil_ph": user_input["soil_ph"],
            "nitrogen": user_input["nitrogen"],
            "phosphorus": user_input["phosphorus"],
            "potassium": user_input["potassium"],
            "historical_msp_inr": user_input["msp"]
        }

        predicted_rainfall = self._predict_climate(ml_features)

        print(f"🌦 Predicted Rainfall: {predicted_rainfall:.2f} mm")

        # 3️⃣ Build Prompt

        llm_prompt = self._build_llm_prompt(
            rainfall=predicted_rainfall,
            temperature=realtime_temp,
            soil_ph=user_input["soil_ph"],
            crop=user_input["crop"],
            language=user_input.get("language", "English")
        )

        # 4️⃣ Generate Advisory

        advisory = self._generate_advisory(llm_prompt)

        return {
            "predicted_rainfall_mm": round(predicted_rainfall, 2),
            "realtime_temperature": realtime_temp,
            "realtime_soil_moisture": realtime_soil,
            "llm_prompt": llm_prompt,
            "advisory": advisory
        }


# =========================================
# DEMO RUN
# =========================================

agri_engine = AgriGuardAIEngine()

sample_input_pipeline = {
    "lat": 19.07,
    "lon": 72.87,
    "current_rainfall_observed": 850,
    "ndvi": 0.68,
    "soil_ph": 6.7,
    "nitrogen": "Medium",
    "phosphorus": "Low",
    "potassium": "High",
    "msp": 2200,
    "crop": "Rice",
    "language": "English"
}

result = agri_engine.run_pipeline(sample_input_pipeline)

print("\n================================")


🚀 Initializing AgriGuard AI Engine...
✅ Climate ML model and scaler loaded
✅ Advisory LLM loaded
✅ AgriGuard AI Engine Ready

 Running AgriGuard AI Pipeline 
🌡 Live Temperature: 24.7
💧 Live Soil Moisture: 0.035
🌦 Predicted Rainfall: 898.82 mm

