In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
# Load your dataset
df = pd.read_csv('yield_df.csv', index_col=0, delimiter=';')

# Separate features and target variable
X = df.drop(columns=['hg/ha_yield'])  # Features
y = df['hg/ha_yield']  # Target (yield)

In [None]:
# Encode categorical variables (Area, Item)
label_encoder = LabelEncoder()
X['Area'] = label_encoder.fit_transform(X['Area'])
X['Item'] = label_encoder.fit_transform(X['Item'])


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize the numerical features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Initialize the Support Vector Regressor model
svr_model = SVR(kernel='rbf')  # Using the RBF kernel (default)

In [None]:
# Train the model on the scaled training data
svr_model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions on the test data
y_pred = svr_model.predict(X_test_scaled)

In [None]:
# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared Score (R2): {r2}")

In [None]:
# Example new data
new_data = {
    'Area': ['India'],  # Example new area
    'Item': ['Wheat'],  # Example new crop
    'Year': [2025],
    'average_rain_fall_mm_per_year': [700],
    'pesticides_tonnes': [40],
    'avg_temp': [23]
}

In [None]:
# Convert new data into DataFrame
new_data_df = pd.DataFrame(new_data)

In [None]:
# Handle unseen labels in new data
def handle_unseen_labels(data, encoder, column_name):
    for val in data[column_name]:
        if val not in encoder.classes_:
            encoder.classes_ = np.append(encoder.classes_, val)
    return encoder.transform(data[column_name])

In [None]:
# Encode the categorical variables in new data using the fitted label encoder
new_data_df['Area'] = handle_unseen_labels(new_data_df, label_encoder, 'Area')
new_data_df['Item'] = handle_unseen_labels(new_data_df, label_encoder, 'Item')


In [None]:
# Standardize the new input data using the fitted scaler
new_data_scaled = scaler.transform(new_data_df)

In [None]:
# Make predictions using the trained SVR model
predicted_yield = svr_model.predict(new_data_scaled)

In [None]:
print(f"Predicted Yield (hg/ha): {predicted_yield[0]}")