In [None]:
import numpy as np
import math  # Import the standard math library for required functions
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Mock dataset: create a synthetic genetic dataset with many features (genes)
X, y = make_classification(n_samples=500, n_features=100, n_informative=10, n_redundant=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the objective function for gene subset evaluation
def evaluate_gene_subset(gene_subset):
    selected_features = [index for index, selected in enumerate(gene_subset) if selected == 1]
    if len(selected_features) == 0:  # No genes selected
        return 0

    # Use k-NN as a simple classifier to evaluate performance
    classifier = KNeighborsClassifier(n_neighbors=3)
    classifier.fit(X_train[:, selected_features], y_train)
    y_pred = classifier.predict(X_test[:, selected_features])
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Define Lévy flight function with math module instead of np.math
def levy_flight(Lambda):
    sigma_u = (math.gamma(1 + Lambda) * math.sin(math.pi * Lambda / 2) /
               (math.gamma((1 + Lambda) / 2) * Lambda * 2**((Lambda - 1) / 2)))**(1 / Lambda)
    u = np.random.randn() * sigma_u
    v = np.random.randn()
    step = u / abs(v)**(1 / Lambda)
    return step

# Cuckoo Search for Gene Selection
def cuckoo_search_gene_selection(n_nests=20, n_iterations=50, discovery_rate=0.25, gene_count=100):
    nests = np.random.randint(2, size=(n_nests, gene_count))  # Initialize nests (binary vectors)
    best_nest = nests[0, :]
    best_fitness = evaluate_gene_subset(best_nest)

    for iteration in range(n_iterations):
        for i in range(n_nests):
            new_nest = nests[i, :].copy()
            # Lévy flight: flip bits based on Lévy steps
            for j in range(gene_count):
                if np.random.rand() < levy_flight(1.5):
                    new_nest[j] = 1 - new_nest[j]  # Flip bit

            new_fitness = evaluate_gene_subset(new_nest)

            # Update nest if new solution is better
            if new_fitness > evaluate_gene_subset(nests[i, :]):
                nests[i, :] = new_nest

            # Update best nest
            if new_fitness > best_fitness:
                best_nest = new_nest
                best_fitness = new_fitness

        # Abandon a fraction of worst nests
        abandoned = np.random.choice(n_nests, int(discovery_rate * n_nests), replace=False)
        nests[abandoned] = np.random.randint(2, size=(len(abandoned), gene_count))

    # Output best gene subset and fitness
    return best_nest, best_fitness

# Run Cuckoo Search for Gene Selection
best_genes, best_accuracy = cuckoo_search_gene_selection()
selected_genes = [i for i, selected in enumerate(best_genes) if selected == 1]
print("Best gene subset:", selected_genes)
print("Best accuracy achieved:", best_accuracy)


Best gene subset: [0, 4, 5, 7, 9, 11, 12, 14, 15, 16, 18, 20, 21, 26, 27, 28, 30, 33, 34, 36, 37, 39, 40, 42, 43, 48, 49, 50, 53, 55, 56, 59, 61, 63, 64, 65, 70, 71, 72, 73, 79, 80, 81, 86, 88, 89, 92, 94, 98, 99]
Best accuracy achieved: 0.92
