In [7]:
import os
import time
import warnings
import pickle
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.random_projection import johnson_lindenstrauss_min_dim
from sklearn.random_projection import SparseRandomProjection
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.exceptions import UndefinedMetricWarning
import zipfile
import csv

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

###############################################################################
# read data from file

def extract_zip(zip_file, extract_to):
    """Extracts a zip file to the specified directory."""
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def getDataPoints(path):
    """Collects content of all .py files within the given directory."""
    dataPointsList = []
    if not os.path.exists(path):
        print(f"Directory does not exist: {path}")
        return dataPointsList

    for root, dirs, files in os.walk(path):
        for dataPointName in files:
            if dataPointName.endswith(".py"):  # Only consider Python files
                file_path = os.path.join(root, dataPointName)
                with open(file_path, encoding="utf-8") as fileIn:
                    dp = fileIn.read().strip()
                    if dp:  # Ensure the document is not empty
                        dataPointsList.append(dp)
                    else:
                        print(f"Empty file: {file_path}")
    
    if len(dataPointsList) == 0:
        print(f"No valid documents found in directory: {path}")
    
    return dataPointsList


def computeResults(testLabels, predictLabels):
    try:
        precision = precision_score(testLabels, predictLabels)
        recall = recall_score(testLabels, predictLabels)
        accuracy = accuracy_score(testLabels, predictLabels)
        f1 = f1_score(testLabels, predictLabels)
        print(f"Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}, F1 Score: {f1}")
    except Exception as e:
        print(f"Error computing metrics: {e}")
        precision = recall = accuracy = f1 = "-"
    return precision, recall, accuracy, f1

###############################################################################
# FLAST

def vectorization(dataPoints, dim=0, eps=0.3):
    countVec = CountVectorizer()
    Z_full = countVec.fit_transform(dataPoints)
    if eps == 0:
        Z = Z_full
    else:
        if dim <= 0:
            dim = johnson_lindenstrauss_min_dim(Z_full.shape[0], eps=eps)
        srp = SparseRandomProjection(n_components=dim)
        Z = srp.fit_transform(Z_full)
    return Z

def classificationRandomForest(trainData, trainLabels, testData, params):
    # training
    t0 = time.perf_counter()
    clf = RandomForestClassifier(
        n_estimators=params.get("n_estimators", 100),
        criterion=params.get("criterion", "gini"),
        max_depth=params.get("max_depth"),
        min_samples_split=params.get("min_samples_split", 2),
        min_samples_leaf=params.get("min_samples_leaf", 1),
        n_jobs=params.get("n_jobs", -1),
        random_state=params.get("random_state", 42)
    )
    clf.fit(trainData, trainLabels)
    t1 = time.perf_counter()
    trainTime = t1 - t0

    t0 = time.perf_counter()
    predictLabels = clf.predict(testData)
    t1 = time.perf_counter()
    testTime = t1 - t0

    return trainTime, testTime, predictLabels

def randomForestWithGridSearch(outDir, projectBasePath, projectName, kf, dim, eps, csv_filename):
    v0 = time.perf_counter()
    flakyDir = os.path.join(extractDir, 'flaky')
    nonFlakyDir = os.path.join(extractDir, 'nonFlaky')
    dataPointsFlaky = getDataPoints(flakyDir)
    dataPointsNonFlaky = getDataPoints(nonFlakyDir)
    dataPoints = dataPointsFlaky + dataPointsNonFlaky
    print("Data points before vectorization:", len(dataPoints))
    Z = vectorization(dataPoints, dim=dim, eps=eps)
    dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])])
    dataLabelsList = np.array([1] * len(dataPointsFlaky) + [0] * len(dataPointsNonFlaky))
    v1 = time.perf_counter()
    vecTime = v1 - v0

    # Define the parameter grid for GridSearchCV
    param_grid = {
        "n_estimators": [50, 100, 200],
        "criterion": ["gini", "entropy"],
        "max_depth": [None, 10, 20],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2]
    }

    # Open the CSV file for writing results
    with open(csv_filename, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["criterion", "max_depth", "min_samples_split", "min_samples_leaf", "precision", "recall", "accuracy", "f1"])  # CSV header

        # Split the data and perform GridSearchCV
        for (train_index, test_index) in kf.split(dataPointsList, dataLabelsList):
            trainData, testData = dataPointsList[train_index], dataPointsList[test_index]
            trainLabels, testLabels = dataLabelsList[train_index], dataLabelsList[test_index]

            # Flatten the train and test data
            nSamplesTrainData, nxTrain, nyTrain = trainData.shape
            trainData = trainData.reshape((nSamplesTrainData, nxTrain * nyTrain))
            nSamplesTestData, nxTest, nyTest = testData.shape
            testData = testData.reshape((nSamplesTestData, nxTest * nyTest))

            # Instantiate RandomForestClassifier
            rf = RandomForestClassifier(random_state=42)
            
            for cvTest in [3,5]:
                
                # GridSearchCV: searching for the best hyperparameters
                grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=cvTest, n_jobs=-1, verbose=1)
                grid_search.fit(trainData, trainLabels)

                # Retrieve the best parameters from the grid search
                best_params = grid_search.best_params_
                print(f"Best hyperparameters: {best_params}")

                # Train the RandomForest with the best parameters and measure times
                trainTime, testTime, predictLabels = classificationRandomForest(trainData, trainLabels, testData, best_params)
                precision, recall, accuracy, f1 = computeResults(testLabels, predictLabels)

                # Write the results to the CSV file
                writer.writerow([best_params['criterion'], cVTest, best_params['min_samples_split'], best_params['min_samples_leaf'], precision, recall, accuracy, f1])

    return best_params

if __name__ == "__main__":
    # Parameters setup
    flakyZip = "cleaned_flaky_files.zip"
    nonFlakyZip = "reduced_nonflaky_files.zip"
    extractDir = "extracted"
    outDir = "results_Random_Forest"
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    numSplit = 30
    testSetSize = 0.2
    kf = StratifiedShuffleSplit(n_splits=numSplit, test_size=testSetSize)

    # Define the CSV file for results
    csv_filename = os.path.join(outDir, "params-random-forest.csv")

    # Run Random Forest with Grid Search and save results to CSV
    best_params = randomForestWithGridSearch(outDir, extractDir, "projectName", kf, dim=0, eps=0.3, csv_filename=csv_filename)
    print(f"Best hyperparameters found: {best_params}")
    print("Random Forest analysis completed.")





Data points before vectorization: 94
Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Precision: 0.6666666666666666, Recall: 0.6, Accuracy: 0.631578947368421, F1 Score: 0.631578947368421
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Precision: 0.75, Recall: 0.6, Accuracy: 0.6842105263157895, F1 Score: 0.6666666666666665
Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Precision: 0.7142857142857143, Recall: 0.5555555555555556, Accuracy: 0.6842105263157895, F1 Score: 0.6250000000000001
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperp

Best hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Precision: 0.7272727272727273, Recall: 0.8888888888888888, Accuracy: 0.7894736842105263, F1 Score: 0.7999999999999999
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Precision: 0.7272727272727273, Recall: 0.8888888888888888, Accuracy: 0.7894736842105263, F1 Score: 0.7999999999999999
Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Precision: 0.7272727272727273, Recall: 0.8888888888888888, Accuracy: 0.7894736842105263, F1 Score: 0.7999999999999999
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None,

Best hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Precision: 0.8333333333333334, Recall: 0.5555555555555556, Accuracy: 0.7368421052631579, F1 Score: 0.6666666666666667
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
Precision: 0.8571428571428571, Recall: 0.6666666666666666, Accuracy: 0.7894736842105263, F1 Score: 0.75
Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
Precision: 0.875, Recall: 0.7777777777777778, Accuracy: 0.8421052631578947, F1 Score: 0.823529411764706
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 

In [1]:
import os
import time
import csv
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.random_projection import johnson_lindenstrauss_min_dim, SparseRandomProjection
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
import warnings

warnings.filterwarnings("ignore")

###############################################################################
# Helper Functions

def getDataPoints(path):
    """Collects content of all .py files within the given directory."""
    dataPointsList = []
    if not os.path.exists(path):
        print(f"Directory does not exist: {path}")
        return dataPointsList

    for root, dirs, files in os.walk(path):
        for dataPointName in files:
            if dataPointName.endswith(".py"):  # Only consider Python files
                file_path = os.path.join(root, dataPointName)
                with open(file_path, encoding="utf-8") as fileIn:
                    dp = fileIn.read().strip()
                    if dp:  # Ensure the document is not empty
                        dataPointsList.append(dp)
                    else:
                        print(f"Empty file: {file_path}")
    
    if len(dataPointsList) == 0:
        print(f"No valid documents found in directory: {path}")
    
    return dataPointsList

def computeResults(testLabels, predictLabels):
    precision = precision_score(testLabels, predictLabels)
    recall = recall_score(testLabels, predictLabels)
    accuracy = accuracy_score(testLabels, predictLabels)
    f1 = f1_score(testLabels, predictLabels)
    
    return precision, recall, accuracy, f1

###############################################################################
# FLAST

def vectorization(dataPoints, dim=0, eps=0.3):
    countVec = CountVectorizer(stop_words=None)  # Disable stop words filtering
    Z_full = countVec.fit_transform(dataPoints)
    if eps == 0:
        Z = Z_full
    else:
        if dim <= 0:
            dim = johnson_lindenstrauss_min_dim(Z_full.shape[0], eps=eps)
        srp = SparseRandomProjection(n_components=dim)
        Z = srp.fit_transform(Z_full)
    return Z

def classificationDecisionTree(trainData, trainLabels, testData, params):
    # Log the parameters being used for training
    print(f"Training Decision Tree with parameters: {params}")
    
    # Create the DecisionTreeClassifier with the selected hyperparameters
    clf = DecisionTreeClassifier(
        criterion=params.get("criterion", "gini"),
        splitter=params.get("splitter", "best"),
        max_depth=params.get("max_depth"),  # Ensure max_depth is correctly passed
        min_samples_split=params.get("min_samples_split", 2),
        min_samples_leaf=params.get("min_samples_leaf", 1),
    )

    # Fit the classifier
    t0 = time.perf_counter()
    clf.fit(trainData, trainLabels)
    t1 = time.perf_counter()
    trainTime = t1 - t0

    # Make predictions
    t0 = time.perf_counter()
    predictLabels = clf.predict(testData)
    t1 = time.perf_counter()
    testTime = t1 - t0

    return trainTime, testTime, predictLabels

from sklearn.model_selection import train_test_split

def decisionTreeWithGridSearch(outDir, projectBasePath, projectName, dim, eps, csv_filename):
    v0 = time.perf_counter()

    # Load data from directories
    flakyDir = os.path.join(extractDir, 'flaky')
    nonFlakyDir = os.path.join(extractDir, 'nonFlaky')
    dataPointsFlaky = getDataPoints(flakyDir)
    dataPointsNonFlaky = getDataPoints(nonFlakyDir)
    dataPoints = dataPointsFlaky + dataPointsNonFlaky
    print("Data points before vectorization:", len(dataPoints))

    # Vectorize data
    Z = vectorization(dataPoints, dim=dim, eps=eps)
    dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])])
    dataLabelsList = np.array([1] * len(dataPointsFlaky) + [0] * len(dataPointsNonFlaky))
    v1 = time.perf_counter()
    vecTime = v1 - v0

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(dataPointsList, dataLabelsList, test_size=0.3, random_state=42)

    # Define parameter grid for GridSearchCV
    param_grid = {
        "criterion": ["gini", "entropy"],
        "splitter": ["best", "random"],
        "max_depth": [10, 30, 50, 100, 300, 500],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2]
    }

    # Open CSV file to write results
    with open(csv_filename, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["criterion", "cvTest", "splitter", "max_depth", "min_samples_split", "min_samples_leaf", "precision", "recall", "accuracy", "f1", "train_time", "test_time"])

        # Instantiate DecisionTreeClassifier
        dt = DecisionTreeClassifier(random_state=42)

        # Perform Grid Search with Cross-Validation
        for cvTest in [3, 5]:
            grid_search = GridSearchCV(estimator=dt, param_grid=param_grid, cv=cvTest, n_jobs=-1, verbose=2)
            grid_search.fit(X_train, y_train)

            # Log results
            for i in range(len(grid_search.cv_results_['params'])):
                params = grid_search.cv_results_['params'][i]
                mean_score = grid_search.cv_results_['mean_test_score'][i]
                mean_fit_time = grid_search.cv_results_['mean_fit_time'][i]
                mean_score_time = grid_search.cv_results_['mean_score_time'][i]

                # Train and test with current parameters
                trainTime, testTime, predictLabels = classificationDecisionTree(X_train, y_train, X_test, params)
                precision, recall, accuracy, f1 = computeResults(y_test, predictLabels)

                # Write results to CSV
                writer.writerow([params['criterion'], cvTest, params['splitter'], params['max_depth'], 
                                 params['min_samples_split'], params['min_samples_leaf'], 
                                 precision, recall, accuracy, f1, mean_fit_time, mean_score_time])

    return grid_search.best_params_



###############################################################################
# Main Execution
if __name__ == "__main__":
    # Parameters setup
    flakyZip = "cleaned_flaky_files.zip"
    nonFlakyZip = "reduced_nonflaky_files.zip"
    extractDir = "extracted"
    outDir = "results_DecisionTree"
    
    # Ensure the directories for output exist
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    # Define the CSV file for results
    csv_filename = os.path.join(outDir, "params-DecisionTree.csv")

    # Run Decision Tree with Grid Search and save results to CSV
    best_params = decisionTreeWithGridSearch(outDir, extractDir, "projectName", dim=0, eps=0.3, csv_filename=csv_filename)
    
    print(f"Best hyperparameters found: {best_params}")
    print("Decision Tree analysis completed.")



Data points before vectorization: 94
Fitting 3 folds for each of 96 candidates, totalling 288 fits
Results for CV=3:
Iteration 0: Mean score: 0.6391129032258065, Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}, Train time: 0.0051s, Test time: 0.0007s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}
Iteration 1: Mean score: 0.5749327956989246, Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}, Train time: 0.0022s, Test time: 0.0007s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}
Iteration 2: Mean score: 0.6391129032258065, Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}, Train time: 0.0057s, Test t

Iteration 34: Mean score: 0.6391129032258065, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}, Train time: 0.0050s, Test time: 0.0000s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
Iteration 35: Mean score: 0.5325940860215054, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0020s, Test time: 0.0007s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}
Iteration 36: Mean score: 0.6606182795698925, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 2, 'min_samples_split': 2, 'splitter': 'best'}, Train time: 0.0049s, Test time: 0.0009s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_lea

Iteration 66: Mean score: 0.6384408602150536, Params: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}, Train time: 0.0070s, Test time: 0.0007s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
Iteration 67: Mean score: 0.575268817204301, Params: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0020s, Test time: 0.0008s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}
Iteration 68: Mean score: 0.6599462365591398, Params: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 2, 'min_samples_split': 2, 'splitter': 'best'}, Train time: 0.0075s, Test time: 0.0010s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 50, 'min_

Iteration 95: Mean score: 0.575268817204301, Params: {'criterion': 'entropy', 'max_depth': 500, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0020s, Test time: 0.0000s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 500, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}
Fitting 5 folds for each of 96 candidates, totalling 480 fits
Results for CV=5:
Iteration 0: Mean score: 0.6485380116959065, Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}, Train time: 0.0058s, Test time: 0.0006s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}
Iteration 1: Mean score: 0.647953216374269, Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}, Train time: 0.0021s, Test time: 0.0006s
Training Decisi

Iteration 33: Mean score: 0.647953216374269, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}, Train time: 0.0023s, Test time: 0.0002s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}
Iteration 34: Mean score: 0.6485380116959065, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}, Train time: 0.0056s, Test time: 0.0008s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
Iteration 35: Mean score: 0.6368421052631579, Params: {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0021s, Test time: 0.0004s
Training Decision Tree with parameters: {'criterion': 'gini', 'max_depth': 300, 'min_samples_le

Iteration 63: Mean score: 0.5426900584795321, Params: {'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0023s, Test time: 0.0002s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}
Iteration 64: Mean score: 0.6269005847953215, Params: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}, Train time: 0.0080s, Test time: 0.0005s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}
Iteration 65: Mean score: 0.5426900584795321, Params: {'criterion': 'entropy', 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}, Train time: 0.0031s, Test time: 0.0004s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 50, 'm

Iteration 95: Mean score: 0.5426900584795321, Params: {'criterion': 'entropy', 'max_depth': 500, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}, Train time: 0.0017s, Test time: 0.0007s
Training Decision Tree with parameters: {'criterion': 'entropy', 'max_depth': 500, 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'random'}
Best hyperparameters found: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'splitter': 'best'}
Decision Tree analysis completed.
