In [0]:
%python
# Install the required package
%pip install ray[tune]

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from ray import tune
from ray.tune.search import TuneSearchCV

# Load iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the objective function
def objective(config):
    model = RandomForestClassifier(**config)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    tune.report(accuracy=accuracy)

# Define the search space
search_space = {
    'n_estimators': tune.randint(10, 200),
    'max_depth': tune.randint(1, 20),
    'min_samples_split': tune.randint(2, 10),
    'min_samples_leaf': tune.randint(1, 10),
    'bootstrap': tune.choice([True, False])
}

# Tune hyperparameters
tuner = TuneSearchCV(
    RandomForestClassifier(random_state=42),
    search_space,
    n_trials=10,
    scoring='accuracy',
    random_state=42
)
tuner.fit(X_train, y_train)

# Get the best model
best_model = tuner.best_estimator_

# Predict and evaluate
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the accuracy
display(pd.DataFrame({'Accuracy': [accuracy]}))

In [0]:
%python
from ray.util.spark import setup_ray_cluster, shutdown_ray_cluster
import os

# Set up Ray cluster with specific resource allocation
ray_conf = setup_ray_cluster(
    min_worker_nodes=1,
    max_worker_nodes=3, # Set to 0 for non-GPU workloads
)

# Initialize Ray
import ray
ray.init()
print(f"Ray intialized with address: {ray_conf[0]}")

# Set ray address for spark integration

os.environ['RAY_ADDRESS']= ray_conf[0]

# Your Ray Tune code here

# Shutdown Ray cluster after use
# shutdown_ray_cluster()

spark_executor_memory + num_Ray_worker_nodes_per_spark_worker * (memory_worker_node + object_store_memory_worker_node) < spark_worker_physical_memory * 0.8

In [0]:
from sklearn.datasets import load_wine
import pandas as pd

# Load wine dataset
wine = load_wine()

wine_df = pd.DataFrame(wine.data, columns=wine.feature_names)

In [0]:
wine_df.display()