In [1]:
import os
import time
import zipfile
import pickle
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.random_projection import johnson_lindenstrauss_min_dim, SparseRandomProjection
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

###############################################################################
# Utility functions

def extract_zip(zip_file, extract_to):
    """Extracts a zip file to the specified directory."""
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def getDataPoints(path):
    """Collects content of all .py files within the given directory."""
    dataPointsList = []
    if not os.path.exists(path):
        print(f"Directory does not exist: {path}")
        return dataPointsList

    for root, dirs, files in os.walk(path):
        for dataPointName in files:
            if dataPointName.endswith(".py"):  # Only consider Python files
                file_path = os.path.join(root, dataPointName)
                with open(file_path, encoding="utf-8") as fileIn:
                    dp = fileIn.read().strip()
                    if dp:  # Ensure the document is not empty
                        dataPointsList.append(dp)
                    else:
                        print(f"Empty file: {file_path}")
    
    if len(dataPointsList) == 0:
        print(f"No valid documents found in directory: {path}")
    
    return dataPointsList

def computeResults(testLabels, predictLabels):
    try:
        precision = precision_score(testLabels, predictLabels)
        recall = recall_score(testLabels, predictLabels)
        accuracy = accuracy_score(testLabels, predictLabels)
        print(f"Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}")
    except Exception as e:
        print(f"Error computing metrics: {e}")
        precision = recall = accuracy = "-"
    return precision, recall, accuracy

###############################################################################
# FLAST functions

def flastVectorization(dataPoints, dim=0, eps=0.3):
    countVec = CountVectorizer(stop_words=None)  # No stop word removal
    Z_full = countVec.fit_transform(dataPoints)
    if eps == 0:
        Z = Z_full
    else:
        if dim <= 0:
            dim = johnson_lindenstrauss_min_dim(Z_full.shape[0], eps=eps)
        srp = SparseRandomProjection(n_components=dim)
        Z = srp.fit_transform(Z_full)
    return Z

def flastClassification(trainData, trainLabels, testData, sigma, k, params):
    kNN = KNeighborsClassifier(
        algorithm=params["algorithm"],
        metric=params["metric"],
        weights=params["weights"],
        n_neighbors=k,
        n_jobs=1
    )
    t0 = time.perf_counter()
    kNN.fit(trainData, trainLabels)
    trainTime = time.perf_counter() - t0

    t0 = time.perf_counter()
    predictLabels = kNN.predict(testData)
    testTime = time.perf_counter() - t0
    
    return trainTime, testTime, predictLabels

###############################################################################
# FLAST KNN analysis

def flastKNN(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps, k, sigma, params):
    v0 = time.perf_counter()

    # Extract the zip files
    flakyDir = os.path.join(extractDir, 'flaky')
    nonFlakyDir = os.path.join(extractDir, 'nonFlaky')
    os.makedirs(flakyDir, exist_ok=True)
    os.makedirs(nonFlakyDir, exist_ok=True)
    
    extract_zip(flakyZip, flakyDir)
    extract_zip(nonFlakyZip, nonFlakyDir)

    dataPointsFlaky = getDataPoints(flakyDir)
    dataPointsNonFlaky = getDataPoints(nonFlakyDir)
    dataPoints = dataPointsFlaky + dataPointsNonFlaky

    print(f"Number of flaky documents: {len(dataPointsFlaky)}")
    print(f"Number of non-flaky documents: {len(dataPointsNonFlaky)}")
    print(f"Total number of documents: {len(dataPoints)}")
    if len(dataPoints) > 0:
        print(f"Sample document: {dataPoints[0]}")

    if len(dataPoints) == 0:
        raise ValueError("No documents available for vectorization. Please check the input directories.")

    Z = flastVectorization(dataPoints, dim=dim, eps=eps)
    dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])])
    dataLabelsList = np.array([1]*len(dataPointsFlaky) + [0]*len(dataPointsNonFlaky))
    vecTime = time.perf_counter() - v0

    # Storage calculation
    kNN = (dataPointsList, dataLabelsList)
    pickleDumpKNN = os.path.join(outDir, f"flast-k{k}-sigma{sigma}.pickle")
    with open(pickleDumpKNN, "wb") as pickleFile:
        pickle.dump(kNN, pickleFile)
    storage = os.path.getsize(pickleDumpKNN)
    os.remove(pickleDumpKNN)

    avgP, avgR, avgA = 0, 0, 0
    avgTPrep, avgTPred = 0, 0
    avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest = 0, 0, 0, 0
    successFold, precisionFold = 0, 0

    for trnIdx, tstIdx in kf.split(dataPointsList, dataLabelsList):
        try:
            trainData, testData = dataPointsList[trnIdx], dataPointsList[tstIdx]
            trainLabels, testLabels = dataLabelsList[trnIdx], dataLabelsList[tstIdx]
            if sum(trainLabels) == 0 or sum(testLabels) == 0:
                print("Skipping fold with no flaky/non-flaky examples...")
                continue

            successFold += 1
            avgFlakyTrain += sum(trainLabels)
            avgNonFlakyTrain += len(trainLabels) - sum(trainLabels)
            avgFlakyTest += sum(testLabels)
            avgNonFlakyTest += len(testLabels) - sum(testLabels)

            trainData = trainData.reshape((trainData.shape[0], -1))
            testData = testData.reshape((testData.shape[0], -1))

            trainTime, testTime, predictLabels = flastClassification(trainData, trainLabels, testData, sigma, k, params)
            preparationTime = (vecTime * len(trainData) / len(dataPoints)) + trainTime
            predictionTime = (vecTime / len(dataPoints)) + (testTime / len(testData))
            precision, recall, accuracy = computeResults(testLabels, predictLabels)

            print(f"Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}")
            if precision != "-":
                precisionFold += 1
                avgP += precision
                avgA += accuracy
            avgR += recall
            avgTPrep += preparationTime
            avgTPred += predictionTime
        except Exception as e:
            print(f"An error occurred during KNN analysis with metric='{params['metric']}', k={k}: {e}")
            continue

    if precisionFold > 0:
        avgP /= precisionFold
        avgA /= precisionFold
    else:
        avgP = avgA = "-"
    avgR /= successFold
    avgTPrep /= successFold
    avgTPred /= successFold
    avgFlakyTrain /= successFold
    avgNonFlakyTrain /= successFold
    avgFlakyTest /= successFold
    avgNonFlakyTest /= successFold

    return avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest, avgP, avgR, avgA, storage, avgTPrep, avgTPred

if __name__ == "__main__":
    # Parameters setup
    flakyZip = "cleaned_flaky_files.zip"
    nonFlakyZip = "reduced_nonflaky_files.zip"
    extractDir = "extracted"
    outDir = "results/"
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    numSplit = 30
    testSetSize = 0.2
    kf = StratifiedShuffleSplit(n_splits=numSplit, test_size=testSetSize)

    outFile = "params-knn.csv"
    with open(os.path.join(outDir, outFile), "w") as fo:
        # Updated the header to match the number of columns
        fo.write("distance,k,sigma,eps,avgFlakyTrain,avgNonFlakyTrain,avgFlakyTest,avgNonFlakyTest,precision,recall,accuracy,storage,preparationTime,predictionTime\n")

    # KNN parameters
    k = 7
    sigma = 0.5
    dim = 0  # Number of dimensions (0: JL with error eps)
    eps = 0.3  # JL epsilon
    params = {"algorithm": "brute", "metric": "cosine", "weights": "uniform"}

    for metric in ["cosine", "euclidean"]:
        for k in [3, 7]:
            print(f"{metric=}, {k=}")
            params["metric"] = metric
            try:
                results = flastKNN(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps, k, sigma, params)
                
                # Flatten the results tuple and write to CSV
                results_flat = ','.join(map(str, results))
                print(f"Results to be written: {params['metric']}, {k}, {sigma}, {eps}, {results_flat}")
                
                with open(os.path.join(outDir, outFile), "a") as fo:
                    fo.write(f"{params['metric']},{k},{sigma},{eps},{results_flat}\n")
            except Exception as e:
                print(f"Failed to complete KNN analysis for metric='{metric}', k={k}: {e}")

    print("KNN analysis completed. Results saved to:", outFile)


metric='cosine', k=3


FileNotFoundError: [Errno 2] No such file or directory: 'flaky_methods.zip'

SVM

In [6]:
import os
import time
import zipfile
import pickle
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.random_projection import johnson_lindenstrauss_min_dim, SparseRandomProjection
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score
from sklearn.model_selection import StratifiedShuffleSplit

###############################################################################
# Utility functions

def extract_zip(zip_file, extract_to):
    """Extracts a zip file to the specified directory."""
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def getDataPoints(path):
    """Collects content of all .py files within the given directory."""
    dataPointsList = []
    if not os.path.exists(path):
        print(f"Directory does not exist: {path}")
        return dataPointsList

    for root, dirs, files in os.walk(path):
        for dataPointName in files:
            if dataPointName.endswith(".py"):  # Only consider Python files
                file_path = os.path.join(root, dataPointName)
                with open(file_path, encoding="utf-8") as fileIn:
                    dp = fileIn.read().strip()
                    if dp:  # Ensure the document is not empty
                        dataPointsList.append(dp)
                    else:
                        print(f"Empty file: {file_path}")
    
    if len(dataPointsList) == 0:
        print(f"No valid documents found in directory: {path}")
    
    return dataPointsList

def computeResults(testLabels, predictLabels):
    precision = precision_score(testLabels, predictLabels, zero_division=0)
    recall = recall_score(testLabels, predictLabels, zero_division=0)
    return precision, recall

###############################################################################
# FLAST functions

def flastVectorization(dataPoints, dim=0, eps=0.3):
    countVec = CountVectorizer(stop_words=None)  # No stop word removal
    Z_full = countVec.fit_transform(dataPoints)
    if eps == 0:
        Z = Z_full
    else:
        if dim <= 0:
            dim = johnson_lindenstrauss_min_dim(Z_full.shape[0], eps=eps)
        srp = SparseRandomProjection(n_components=dim)
        Z = srp.fit_transform(Z_full)
    return Z

def flastSVMClassification(trainData, trainLabels, testData, C, kernel, params):
    t0 = time.perf_counter()
    svm = SVC(C=C, kernel=kernel)
    svm.fit(trainData, trainLabels)
    trainTime = time.perf_counter() - t0

    t0 = time.perf_counter()
    predictLabels = svm.predict(testData)
    testTime = time.perf_counter() - t0

    return trainTime, testTime, predictLabels

###############################################################################
# FLAST SVM analysis

def flastSVM(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps, C, kernel, params):
    v0 = time.perf_counter()

    # Extract the zip files
    flakyDir = os.path.join(extractDir, 'flaky')
    nonFlakyDir = os.path.join(extractDir, 'nonFlaky')
    os.makedirs(flakyDir, exist_ok=True)
    os.makedirs(nonFlakyDir, exist_ok=True)
    
    extract_zip(flakyZip, flakyDir)
    extract_zip(nonFlakyZip, nonFlakyDir)

    dataPointsFlaky = getDataPoints(flakyDir)
    dataPointsNonFlaky = getDataPoints(nonFlakyDir)
    dataPoints = dataPointsFlaky + dataPointsNonFlaky

    print(f"Number of documents: {len(dataPoints)}")
    if len(dataPoints) > 0:
        print(f"Sample document: {dataPoints[0]}")

    if len(dataPoints) == 0:
        raise ValueError("No documents available for vectorization. Please check the input directories.")

    Z = flastVectorization(dataPoints, dim=dim, eps=eps)
    dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])])
    dataLabelsList = np.array([1]*len(dataPointsFlaky) + [0]*len(dataPointsNonFlaky))
    vecTime = time.perf_counter() - v0

    # Storage calculation
    svm = (dataPointsList, dataLabelsList)
    pickleDumpSVM = os.path.join(outDir, f"flast-svm-C{C}-kernel{kernel}.pickle")
    with open(pickleDumpSVM, "wb") as pickleFile:
        pickle.dump(svm, pickleFile)
    storage = os.path.getsize(pickleDumpSVM)
    os.remove(pickleDumpSVM)

    avgP, avgR = 0, 0
    avgTPrep, avgTPred = 0, 0
    avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest = 0, 0, 0, 0
    successFold, precisionFold = 0, 0

    for trnIdx, tstIdx in kf.split(dataPointsList, dataLabelsList):
        trainData, testData = dataPointsList[trnIdx], dataPointsList[tstIdx]
        trainLabels, testLabels = dataLabelsList[trnIdx], dataLabelsList[tstIdx]
        if sum(trainLabels) == 0 or sum(testLabels) == 0:
            print("Skipping fold with no flaky/non-flaky examples...")
            continue

        successFold += 1
        avgFlakyTrain += sum(trainLabels)
        avgNonFlakyTrain += len(trainLabels) - sum(trainLabels)
        avgFlakyTest += sum(testLabels)
        avgNonFlakyTest += len(testLabels) - sum(testLabels)

        trainData = trainData.reshape((trainData.shape[0], -1))
        testData = testData.reshape((testData.shape[0], -1))

        trainTime, testTime, predictLabels = flastSVMClassification(trainData, trainLabels, testData, C, kernel, params)
        preparationTime = (vecTime * len(trainData) / len(dataPoints)) + trainTime
        predictionTime = (vecTime / len(dataPoints)) + (testTime / len(testData))
        precision, recall = computeResults(testLabels, predictLabels)

        print(f"Precision: {precision}, Recall: {recall}")
        if precision != "-":
            precisionFold += 1
            avgP += precision
        avgR += recall
        avgTPrep += preparationTime
        avgTPred += predictionTime

    if precisionFold > 0:
        avgP /= precisionFold
    else:
        avgP = "-"
    avgR /= successFold
    avgTPrep /= successFold
    avgTPred /= successFold
    avgFlakyTrain /= successFold
    avgNonFlakyTrain /= successFold
    avgFlakyTest /= successFold
    avgNonFlakyTest /= successFold

    return avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest, avgP, avgR, storage, avgTPrep, avgTPred

if __name__ == "__main__":
    # Parameters setup
    flakyZip = "flakyFiles.zip"
    nonFlakyZip = "nonflakyFiles.zip"
    extractDir = "extracted"
    outDir = "results-svm/"
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    numSplit = 30
    testSetSize = 0.2
    kf = StratifiedShuffleSplit(n_splits=numSplit, test_size=testSetSize)

    outFile = "params-svm.csv"
    with open(os.path.join(outDir, outFile), "w") as fo:
        fo.write("kernel,C,sigma,eps,precision,recall,storage,preparationTime,predictionTime\n")

    # SVM parameters to vary
    C_values = [0.1, 1.0, 10.0]  # Regularization parameter
    kernel_types = ["linear", "rbf", "poly", "sigmoid"]  # Kernel types
    dim_values = [0, 100, 200]  # Dimensionality reduction
    eps_values = [0.1, 0.3, 0.5]  # JL epsilon

    for C in C_values:
        for kernel in kernel_types:
            for dim in dim_values:
                for eps in eps_values:
                    print(f"{kernel=}, {C=}, {dim=}, {eps=}")
                    results = flastSVM(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps, C, kernel, {})
                    with open(os.path.join(outDir, outFile), "a") as fo:
                        fo.write("{},{},{},{},{},{},{},{},{}\n".format(kernel, C, dim, eps, *results))

    print("SVM analysis completed. Results saved to:", outFile)


kernel='linear', C=0.1, dim=0, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticme

Precision: 0.5238095238095238, Recall: 0.55
Precision: 0.5, Recall: 0.55
Precision: 0.4782608695652174, Recall: 0.55
Precision: 0.5416666666666666, Recall: 0.65
Precision: 0.38095238095238093, Recall: 0.4
Precision: 0.48, Recall: 0.6
kernel='linear', C=0.1, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dat

Precision: 0.3125, Recall: 0.25
Precision: 0.4444444444444444, Recall: 0.4
Precision: 0.3181818181818182, Recall: 0.35
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.4444444444444444, Recall: 0.6
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.45, Recall: 0.45
Precision: 0.34782608695652173, Recall: 0.4
Precision: 0.35, Recall: 0.35
Precision: 0.4583333333333333, Recall: 0.55
Precision: 0.42857142857142855, Recall: 0.45
Precision: 0.39285714285714285, Recall: 0.55
Precision: 0.3157894736842105, Recall: 0.3
Precision: 0.30434782608695654, Recall: 0.35
Precision: 0.47368421052631576, Recall: 0.45
Precision: 0.2777777777777778, Recall: 0.25
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.34782608695652173, Recall: 0.4
Precision: 0.45, Recall: 0.45
Precision: 0.35714285714285715, Recall: 0.25
Precision: 0.43478260869565216, Recall: 0.5
Precision: 0.4444444444444444, Recall: 0.4
Precision: 0.4583333333333333, Recall: 0.

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
kernel='rbf', C=0.1, dim=200, eps=0.3
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticme

Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precis

Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
kernel='poly', C=0.1, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)


Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4838709677419355, Recall: 0.75
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.45454545454545453, Recall: 0.75
Precision: 0.5, Recall: 0.95
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.9
Precision: 0.5, Recall: 0.9
Precision: 0.5, Recall: 0.85
Precision: 0.5, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.95
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5, Recall: 0.9
Precision: 0.45714285714285713, Rec

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 

Precision: 0.56, Recall: 0.7
Precision: 0.5789473684210527, Recall: 0.55
Precision: 0.4782608695652174, Recall: 0.55
Precision: 0.43478260869565216, Recall: 0.5
Precision: 0.5, Recall: 0.55
Precision: 0.4782608695652174, Recall: 0.55
Precision: 0.4375, Recall: 0.35
Precision: 0.5, Recall: 0.45
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.52, Recall: 0.65
Precision: 0.4, Recall: 0.4
Precision: 0.5833333333333334, Recall: 0.7
Precision: 0.4166666666666667, Recall: 0.25
Precision: 0.4, Recall: 0.4
Precision: 0.5, Recall: 0.55
Precision: 0.4444444444444444, Recall: 0.4
Precision: 0.5294117647058824, Recall: 0.45
Precision: 0.5909090909090909, Recall: 0.65
Precision: 0.3333333333333333, Recall: 0.2
Precision: 0.47368421052631576, Recall: 0.45
Precision: 0.45454545454545453, Recall: 0.5
kernel='linear', C=1.0, dim=0, eps=0.3
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the 

Precision: 0.4, Recall: 0.4
Precision: 0.38095238095238093, Recall: 0.4
Precision: 0.43478260869565216, Recall: 0.5
Precision: 0.45454545454545453, Recall: 0.5
Precision: 0.5263157894736842, Recall: 0.5
kernel='linear', C=1.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class Modul

Precision: 0.45454545454545453, Recall: 0.5
Precision: 0.4117647058823529, Recall: 0.35
Precision: 0.4117647058823529, Recall: 0.35
Precision: 0.35294117647058826, Recall: 0.3
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.47368421052631576, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.3684210526315789, Recall: 0.35
Precision: 0.35, Recall: 0.35
Precision: 0.5294117647058824, Recall: 0.45
Precision: 0.2777777777777778, Recall: 0.25
Precision: 0.23076923076923078, Recall: 0.15
kernel='linear', C=1.0, dim=200, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distribu

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.43333333333333335, Recall: 0.65
Precision: 0.5405405405405406, Recall: 1.0
Precision: 0.4838709677419355, Recall: 0.75
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.85
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.5263157894736842, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.41935483870967744, Recall: 0.65
Precision: 0.5, Recall: 0.85
Precision: 0.5277777777777778, Recall: 0.95
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.45714285714285713, Recall: 0.8
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.5151515151515151, Recall: 0.85
Precision: 0.4666666666666667, Recall: 0.7
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5151515151515151, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.9
kernel='rbf', C=1.0, dim=0, eps=0.5
Number of documents:

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5, Recall: 0.9
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.95
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.45714285714285713, Recall: 0.8
Precision: 0.5, Reca

PermissionError: [Errno 13] Permission denied: 'results-svm/params-svm.csv'

In [7]:
if __name__ == "__main__":
    # Parameters setup
    flakyZip = "flakyFiles.zip"
    nonFlakyZip = "nonflakyFiles.zip"
    extractDir = "extracted"
    outDir = "results-svm/"
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    numSplit = 30
    testSetSize = 0.2
    kf = StratifiedShuffleSplit(n_splits=numSplit, test_size=testSetSize)

    outFile = "params-svm2.csv"
    with open(os.path.join(outDir, outFile), "w") as fo:
        # Adjusted header
        fo.write("kernel,C,dim,eps,storage,preparationTime,predictionTime,precision,recall\n")

    # SVM parameters to vary
    C_values = [0.1, 1.0, 10.0]  # Regularization parameter
    kernel_types = ["linear", "rbf", "poly", "sigmoid"]  # Kernel types
    dim_values = [0, 100, 200]  # Dimensionality reduction
    eps_values = [0.1, 0.3, 0.5]  # JL epsilon

    for C in C_values:
        for kernel in kernel_types:
            for dim in dim_values:
                for eps in eps_values:
                    print(f"{kernel=}, {C=}, {dim=}, {eps=}")
                    results = flastSVM(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps, C, kernel, {})
                    with open(os.path.join(outDir, outFile), "a") as fo:
                        # Adjusted data writing line
                        fo.write("{},{},{},{},{},{},{},{},{}\n".format(
                            kernel, C, dim, eps, results[6], results[7], results[8], results[4], results[5]
                        ))

    print("SVM analysis completed. Results saved to:", outFile)

kernel='linear', C=0.1, dim=0, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticme

Precision: 0.36363636363636365, Recall: 0.4
Precision: 0.3, Recall: 0.3
Precision: 0.29411764705882354, Recall: 0.25
kernel='linear', C=0.1, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> 

Precision: 0.42857142857142855, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.5, Recall: 0.6
Precision: 0.3888888888888889, Recall: 0.35
Precision: 0.29411764705882354, Recall: 0.25
Precision: 0.29411764705882354, Recall: 0.25
Precision: 0.35, Recall: 0.35
Precision: 0.42857142857142855, Recall: 0.45
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.375, Recall: 0.45
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.2777777777777778, Recall: 0.25
Precision: 0.2777777777777778, Recall: 0.25
Precision: 0.4, Recall: 0.4
Precision: 0.3125, Recall: 0.25
Precision: 0.44, Recall: 0.55
Precision: 0.3333333333333333, Recall: 0.3
Precision: 0.38461538461538464, Recall: 0.5
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.5833333333333334, Recall: 0.7
Precision: 0.4230769230769231, Recall: 0.55
Precision: 0.3333333333333333, Recall: 0.35
Precision: 0.43478260869565216, Recall: 0.5
Precision: 0.4, Recall: 0.4

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
kernel='rbf', C=0.1, dim=0, eps=0.5
Number of document

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
kernel='rbf', C=0.1, dim=200, eps=0.3
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loa

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5, Recall: 0.95
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Reca

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
kernel='poly', C=0.1, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFra

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5128205128205128, Recall: 1.0
Precision: 

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5, Recall: 0.4
Precision: 0.52, Recall: 0.65
Precision: 0.5263157894736842, Recall: 0.5
Precision: 0.4782608695652174, Recall: 0.55
Precision: 0.375, Recall: 0.3
Precision: 0.4, Recall: 0.4
Precision: 0.5789473684210527, Recall: 0.55
Precision: 0.5263157894736842, Recall: 0.5
Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.4, Recall: 0.4
Precision: 0.5454545454545454, Recall: 0.6
Precision: 0.4444444444444444, Recall: 0.4
Precision: 0.4444444444444444, Recall: 0.4
Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.5263157894736842, Recall: 0.5
Precision: 0.5333333333333333, Recall: 0.4
Precision: 0.43478260869565216, Recall: 0.5
Precision: 0.3888888888888889, Recall: 0.35
Precision: 0.5652173913043478, Recall: 0.65
Precision: 0.5833333333333334, Recall: 0.7
Precision: 0.5, Recall: 0.5
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.5, Recall: 0.5
Precision: 0.5909090909090909, Recall: 0.65
Precision: 0.45,

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5, Recall: 0.95
Precision: 0.5405405405405406, Recall: 1.0
Precision: 0.5, Recall: 0.9
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4444444444444444, Recall: 0.6
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5, Recall: 0.95
Precision: 0.47058823529411764, Recall: 0.8
Precision: 0.4838709677419355, Recall: 0.75
Precision: 0.46875, Recall: 0.75
Precision: 0.45454545454545453, Recall: 0.75
Precision: 0.5, Recall: 0.95
Precision: 0.48484848484848486, Recall: 0.8
Precision: 0.5, Recall: 0.95
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5263157894736842, Recall: 1.0
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.48484848484848486, Recall: 0.8
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.5, Recall: 0.9
Precision: 0.45714285714285713, Recall: 0.8
Precision: 0.4545454545

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.45454545454545453, Recall: 0.75
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.5, Recall: 0.9
kernel='rbf', C=1.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5128205128205128, Recall: 1.0
kernel='poly', C=1.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @static

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5142857142857142, Recall: 0.9
Precision: 0.5142857142857142, Recall: 0.9
kernel='sigmoid', C=1.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @stati

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.4166666666666667, Recall: 0.25
Precision: 0.5, Recall: 0.9
kernel='linear', C=10.0, dim=0, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc

Precision: 0.5882352941176471, Recall: 0.5
Precision: 0.5, Recall: 0.5
Precision: 0.45, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
kernel='linear', C=10.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractm

Precision: 0.375, Recall: 0.3
Precision: 0.45454545454545453, Recall: 0.5
Precision: 0.48, Recall: 0.6
Precision: 0.45, Recall: 0.45
Precision: 0.4, Recall: 0.5
Precision: 0.391304347826087, Recall: 0.45
Precision: 0.42857142857142855, Recall: 0.45
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.48, Recall: 0.6
Precision: 0.35714285714285715, Recall: 0.25
Precision: 0.3888888888888889, Recall: 0.35
Precision: 0.42105263157894735, Recall: 0.4
Precision: 0.35294117647058826, Recall: 0.3
Precision: 0.36363636363636365, Recall: 0.4
Precision: 0.391304347826087, Recall: 0.45
Precision: 0.48, Recall: 0.6
Precision: 0.4, Recall: 0.4
Precision: 0.45454545454545453, Recall: 0.5
Precision: 0.5263157894736842, Recall: 0.5
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.3333333333333333, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.42857142857142855, Recall: 0.45
Precision: 0.5, Recall: 0.65
Precision: 0.5, Reca

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5172413793103449, Recall: 0.75
Precision: 0.4166666666666667, Recall: 0.25
Precision: 0.5555555555555556, Recall: 0.5
Precision: 0.4375, Recall: 0.35
Precision: 0.45, Recall: 0.45
Precision: 0.5555555555555556, Recall: 0.5
Precision: 0.47368421052631576, Recall: 0.45
Precision: 0.4090909090909091, Recall: 0.45
Precision: 0.5652173913043478, Recall: 0.65
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.38461538461538464, Recall: 0.25
Precision: 0.5263157894736842, Recall: 0.5
Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.4, Recall: 0.3
Precision: 0.5, Recall: 0.35
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.45454545454545453, Recall: 0.5
Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.47619047619047616, Recall: 0.5
Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.44, Recall: 0.55
Precision: 0.5238095238095238, Recall: 0.55
Precision: 0.48148148148148145, Recall: 0.65
kernel='rbf', C=10.0, dim=0, eps=0.5
Number of documents: 1

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.5238095238095238, Recall: 0.55
Precision: 0.5714285714285714, Recall: 0.4
kernel='rbf', C=10.0, dim=200, eps=0.3
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticm

Precision: 0.5142857142857142, Recall: 0.9
Precision: 0.4642857142857143, Recall: 0.65
Precision: 0.45714285714285713, Recall: 0.8
Precision: 0.5, Recall: 0.85
Precision: 0.5, Recall: 0.85
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.4722222222222222, Recall: 0.85
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5128205128205128, Recall: 1.0
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.5, Recall: 0.85
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.4864864864864865, Recall: 0.9
Precision: 0.5294117647058824, Recall: 0.9
Precision: 0.4, Recall: 0.2
Precision: 0.5161290322580645, Recall: 0.8
Precision: 0.47058823529411764, Recall: 0.8
Precision: 0.48484848484848486, Recall: 0.8
Precision: 0.5294117647058824, Recall: 0.9
Precision: 0.5135135135135135, Recall: 0.95
Precision: 0.5, Recall: 0.9
Precision: 0.4857142857142857, Recall: 0.85
Precision: 0.45714285714285713, Recall: 0.8
Precision: 0.4722222222222222

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Precision: 0.4864864864864865, Recall: 0.9
kernel='sigmoid', C=10.0, dim=0, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_m

Precision: 0.47058823529411764, Recall: 0.4
Precision: 0.38461538461538464, Recall: 0.5
Precision: 0.36363636363636365, Recall: 0.4
Precision: 0.46153846153846156, Recall: 0.6
Precision: 0.35, Recall: 0.35
Precision: 0.5384615384615384, Recall: 0.35
kernel='sigmoid', C=10.0, dim=100, eps=0.1
Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


Number of documents: 195
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractmethod
    def wr

Naive Bayes

In [8]:
import os
import time
import zipfile
import pickle
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.random_projection import johnson_lindenstrauss_min_dim, SparseRandomProjection
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import precision_score, recall_score
from sklearn.model_selection import StratifiedShuffleSplit

###############################################################################
# Utility functions

def extract_zip(zip_file, extract_to):
    """Extracts a zip file to the specified directory."""
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

def getDataPoints(path):
    """Collects content of all .py files within the given directory."""
    dataPointsList = []
    if not os.path.exists(path):
        print(f"Directory does not exist: {path}")
        return dataPointsList

    for root, dirs, files in os.walk(path):
        for dataPointName in files:
            if dataPointName.endswith(".py"):  # Only consider Python files
                file_path = os.path.join(root, dataPointName)
                with open(file_path, encoding="utf-8") as fileIn:
                    dp = fileIn.read().strip()
                    if dp:  # Ensure the document is not empty
                        dataPointsList.append(dp)
                    else:
                        print(f"Empty file: {file_path}")
    
    if len(dataPointsList) == 0:
        print(f"No valid documents found in directory: {path}")
    
    return dataPointsList

def computeResults(testLabels, predictLabels):
    precision = precision_score(testLabels, predictLabels, zero_division=0)
    recall = recall_score(testLabels, predictLabels, zero_division=0)
    return precision, recall

###############################################################################
# FLAST functions

def flastVectorization(dataPoints, dim=0, eps=0.3):
    countVec = CountVectorizer(stop_words=None)  # No stop word removal
    Z_full = countVec.fit_transform(dataPoints)
    if eps == 0:
        Z = Z_full
    else:
        if dim <= 0:
            dim = johnson_lindenstrauss_min_dim(Z_full.shape[0], eps=eps)
        srp = SparseRandomProjection(n_components=dim)
        Z = srp.fit_transform(Z_full)
    return Z

def flastNBClassification(trainData, trainLabels, testData):
    trainData = np.abs(trainData)
    testData = np.abs(testData)

    t0 = time.perf_counter()
    nb = MultinomialNB()
    nb.fit(trainData, trainLabels)
    trainTime = time.perf_counter() - t0

    t0 = time.perf_counter()
    predictLabels = nb.predict(testData)
    testTime = time.perf_counter() - t0

    return trainTime, testTime, predictLabels

###############################################################################
# FLAST Naive Bayes analysis

def flastNB(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps):
    v0 = time.perf_counter()

    # Extract the zip files
    flakyDir = os.path.join(extractDir, 'flaky')
    nonFlakyDir = os.path.join(extractDir, 'nonFlaky')
    os.makedirs(flakyDir, exist_ok=True)
    os.makedirs(nonFlakyDir, exist_ok=True)
    
    extract_zip(flakyZip, flakyDir)
    extract_zip(nonFlakyZip, nonFlakyDir)

    dataPointsFlaky = getDataPoints(flakyDir)
    dataPointsNonFlaky = getDataPoints(nonFlakyDir)
    dataPoints = dataPointsFlaky + dataPointsNonFlaky

    print(f"Number of documents: {len(dataPoints)}")
    if len(dataPoints) > 0:
        print(f"Sample document: {dataPoints[0]}")

    if len(dataPoints) == 0:
        raise ValueError("No documents available for vectorization. Please check the input directories.")

    Z = flastVectorization(dataPoints, dim=dim, eps=eps)
    dataPointsList = np.array([Z[i].toarray() for i in range(Z.shape[0])])
    dataLabelsList = np.array([1]*len(dataPointsFlaky) + [0]*len(dataPointsNonFlaky))
    vecTime = time.perf_counter() - v0

    # Storage calculation
    nb = (dataPointsList, dataLabelsList)
    pickleDumpNB = os.path.join(outDir, f"flast-nb-dim{dim}-eps{eps}.pickle")
    with open(pickleDumpNB, "wb") as pickleFile:
        pickle.dump(nb, pickleFile)
    storage = os.path.getsize(pickleDumpNB)
    os.remove(pickleDumpNB)

    avgP, avgR = 0, 0
    avgTPrep, avgTPred = 0, 0
    avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest = 0, 0, 0, 0
    successFold, precisionFold = 0, 0

    for trnIdx, tstIdx in kf.split(dataPointsList, dataLabelsList):
        trainData, testData = dataPointsList[trnIdx], dataPointsList[tstIdx]
        trainLabels, testLabels = dataLabelsList[trnIdx], dataLabelsList[tstIdx]
        if sum(trainLabels) == 0 or sum(testLabels) == 0:
            print("Skipping fold with no flaky/non-flaky examples...")
            continue

        successFold += 1
        avgFlakyTrain += sum(trainLabels)
        avgNonFlakyTrain += len(trainLabels) - sum(trainLabels)
        avgFlakyTest += sum(testLabels)
        avgNonFlakyTest += len(testLabels) - sum(testLabels)

        trainData = trainData.reshape((trainData.shape[0], -1))
        testData = testData.reshape((testData.shape[0], -1))

        trainTime, testTime, predictLabels = flastNBClassification(trainData, trainLabels, testData)
        preparationTime = (vecTime * len(trainData) / len(dataPoints)) + trainTime
        predictionTime = (vecTime / len(dataPoints)) + (testTime / len(testData))
        precision, recall = computeResults(testLabels, predictLabels)

        print(f"Precision: {precision}, Recall: {recall}")
        if precision != "-":
            precisionFold += 1
            avgP += precision
        avgR += recall
        avgTPrep += preparationTime
        avgTPred += predictionTime

    if precisionFold > 0:
        avgP /= precisionFold
    else:
        avgP = "-"
    avgR /= successFold
    avgTPrep /= successFold
    avgTPred /= successFold
    avgFlakyTrain /= successFold
    avgNonFlakyTrain /= successFold
    avgFlakyTest /= successFold
    avgNonFlakyTest /= successFold

    return avgFlakyTrain, avgNonFlakyTrain, avgFlakyTest, avgNonFlakyTest, avgP, avgR, storage, avgTPrep, avgTPred

if __name__ == "__main__":
    # Parameters setup
    flakyZip = "flaky_methods.zip"
    nonFlakyZip = "non-flakyMethods.zip"
    extractDir = "extracted"
    outDir = "results-nb/"
    os.makedirs(outDir, exist_ok=True)
    os.makedirs(extractDir, exist_ok=True)

    numSplit = 30
    testSetSize = 0.2
    kf = StratifiedShuffleSplit(n_splits=numSplit, test_size=testSetSize)

    outFile = "params-nb.csv"
    with open(os.path.join(outDir, outFile), "w") as fo:
        fo.write("dim,eps,precision,recall,storage,preparationTime,predictionTime\n")

    # Naive Bayes parameters to vary
    dim_values = [0, 100, 200]  # Dimensionality reduction
    eps_values = [0.1, 0.3, 0.5]  # JL epsilon

    for dim in dim_values:
        for eps in eps_values:
            print(f"dim={dim}, eps={eps}")
            results = flastNB(outDir, flakyZip, nonFlakyZip, extractDir, kf, dim, eps)
            with open(os.path.join(outDir, outFile), "a") as fo:
                fo.write("{},{},{},{},{},{},{}\n".format(dim, eps, *results))

    print("Naive Bayes analysis completed. Results saved to:", outFile)


dim=0, eps=0.1
Number of documents: 301
Sample document: # Copyright 2021 The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import abc

from flax.core import freeze


@dataclasses.dataclass(frozen=True)
class ModuleFramework(abc.ABC):
    @staticmethod
    @abc.abstractmethod
    def is_loaded() -> bool:
        pass

    @staticmethod
    @abc.abstractmethod
    def is_my_module(module):
        pass

    @staticmethod
    @abc.abstractme