# Drought Prediction with XGBoost

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

# Load data
df = pd.read_csv('USDMData.csv')
df = df.ffill()  # Updated from deprecated fillna method

# Separate features and labels
features = df.drop(columns=['DroughtCategory'], errors='ignore')

# Convert object columns to numeric
for col in features.select_dtypes(include='object').columns:
    features[col] = features[col].astype('category').cat.codes

# Encode labels
labels = df['DroughtCategory'] if 'DroughtCategory' in df.columns else df.iloc[:, -1]
le = LabelEncoder()
labels = le.fit_transform(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train model
model = XGBClassifier()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
