In [94]:
import json
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
import pandas as pd

# Load the JSON configuration
with open('algoparams_from_ui.json') as f:
    ml_steps = json.load(f)

# Load the data
data = pd.read_csv(ml_steps["iris.csv"])

# Extract the target variable
y = data[ml_steps["target"]]

# Extract the feature columns
X = data.drop(columns=[ml_steps["target"]])

# Define the column transformer for feature handling
numeric_transformer = Pipeline(steps=[
    ('scaler', preprocessing.StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('onehot', preprocessing.OneHotEncoder(handle_unknown='ignore'))])

preprocessing_pipeline = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ml_steps["feature_handling"]["numeric_columns"]),
        ('cat', categorical_transformer, ml_steps["feature_handling"]["categorical_columns"])
    ])

# Define the pipeline for model building
pipeline = Pipeline(steps=[
    ('preprocessing', preprocessing_pipeline),
    ('classifier', RandomForestClassifier())
])

# Define the hyperparameters to search
param_grid = ml_steps["model_building"]["hyperparameters"]

# Perform grid search
grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=5)
grid_search.fit(X, y)

# Print the best hyperparameters and the corresponding score
print("Best hyperparameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)


KeyError: 'target'