In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from concrete.ml.sklearn import LogisticRegression as LR
import time

In [2]:
def generate_data(n, num_points=100):
    np.random.seed(42)
    X = np.random.rand(num_points, n)
    
    # Make the criteria for assigning y more non-linear
    y = ((np.sum(X[:, :n//2] >= 0.5, axis=1) > n//4) & (np.sum(X[:, :n//2] >= 0.7, axis=1) > n//6)).astype(int)
    
    return X, y

def save_test_data_to_file(filename, X_test, y_test):
    with open(filename, 'w') as file:
        for i in range(X_test.shape[0]):
            line = ' '.join(map(str, X_test[i])) + ' 1 '+f' {y_test[i]}\n'
            file.write(line)

def save_weights_to_file(filename, weights, intercept):
    with open(filename, 'w') as file:
        file.write(' '.join(map(str, weights)))
        file.write(f'\n{intercept}\n')

In [3]:
dimensions = [2, 4, 8, 16, 32]

for n in dimensions:
    X, y = generate_data(n)

    # Split the dataset into training and test sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train a logistic regression classifier
    clf = LogisticRegression(random_state=42)
    clf.fit(X_train, y_train)

    # Test the classifier on the test set and measure prediction time
    start_time = time.time()
    y_pred = clf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_pred)

    # Save test data to a file
    test_data_filename = f'test_data_{n}D.txt'
    save_test_data_to_file(test_data_filename, X_test, y_test)
    
    # Save weights to a file
    weights_filename = f'weights_{n}D.txt'
    save_weights_to_file(weights_filename, clf.coef_[0], clf.intercept_[0])

    result_filename = f'results_{n}D.txt'
    with open(result_filename, 'w') as file:
        file.write(f'Test Accuracy: {test_accuracy:.4f}\n')
        file.write(f'Time to predict on test data: {prediction_time:.6f} seconds\n')

    print(f'Test data with {n} dimensions processed.')

    model = LR(n_bits=8)
    model.fit(X_train, y_train)

    # We can simulate the predictions in the clear
    y_pred_clear = model.predict(X_test)
    print(type(model), type(clf))
    # We then compile on a representative set 
    model.compile(X_train)
    

    # Finally we run the inference on encrypted inputs !
    y_pred_fhe = model.predict(X_test, fhe="execute")
    test_accuracy_fhe = accuracy_score(y_test, y_pred_fhe)
    test_accuracy_clear_concrete = accuracy_score(y_test, y_pred_clear)

    print("Accuracy_clear_concrete  :", test_accuracy_clear_concrete)
    print("Accuracy_FHE    :", test_accuracy_fhe)
    print(f"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%")

Test data with 2 dimensions processed.
<class 'concrete.ml.sklearn.linear_model.LogisticRegression'> <class 'sklearn.linear_model._logistic.LogisticRegression'>


RuntimeError: Function you are trying to compile cannot be converted to MLIR:

 %0 = [[255] [  0]]                         # ClearTensor<uint8, shape=(2, 1)>
 %1 = 40                                    # ClearScalar<uint6>
 %2 = _input_0                              # EncryptedTensor<uint8, shape=(1, 2)>
 %3 = 1                                     # ClearScalar<uint1>
 %4 = add(%2, %3)                           # EncryptedTensor<uint9, shape=(1, 2)>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ only up to 8-bit integers are supported
 %5 = subgraph(%4)                          # EncryptedTensor<uint8, shape=(1, 2)>
 %6 = matmul(%5, %0)                        # EncryptedTensor<uint16, shape=(1, 1)>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ only up to 8-bit integers are supported
 %7 = sum(%5, axis=1, keepdims=True)        # EncryptedTensor<uint9, shape=(1, 1)>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ only up to 8-bit integers are supported
 %8 = multiply(%1, %7)                      # EncryptedTensor<uint15, shape=(1, 1)>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ only up to 8-bit integers are supported
 %9 = add(%6, %8)                           # EncryptedTensor<uint17, shape=(1, 1)>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ only up to 8-bit integers are supported
%10 = subgraph(%9)                          # EncryptedTensor<uint8, shape=(1, 1)>
return %10

Subgraphs:

    %5 = subgraph(%4):

         %0 = 0                              # ClearScalar<uint1>
         %1 = 255                            # ClearScalar<uint8>
         %2 = -1                             # ClearScalar<int1>
         %3 = 0.0038484894881447643          # ClearScalar<float64>
         %4 = 0.0038484894881447643          # ClearScalar<float64>
         %5 = input                          # EncryptedTensor<uint2, shape=(1, 2)>
         %6 = multiply(%4, %5)               # EncryptedTensor<float64, shape=(1, 2)>
         %7 = true_divide(%6, %3)            # EncryptedTensor<float64, shape=(1, 2)>
         %8 = add(%7, %2)                    # EncryptedTensor<float64, shape=(1, 2)>
         %9 = rint(%8)                       # EncryptedTensor<float64, shape=(1, 2)>
        %10 = clip(%9, %0, %1)               # EncryptedTensor<float64, shape=(1, 2)>
        %11 = astype(%10, dtype=int_)        # EncryptedTensor<uint1, shape=(1, 2)>
        return %11

    %10 = subgraph(%9):

         %0 = 0                                 # ClearScalar<uint1>
         %1 = 255                               # ClearScalar<uint8>
         %2 = -23                               # ClearScalar<int6>
         %3 = 0.0026415093086641184             # ClearScalar<float64>
         %4 = 1.0                               # ClearScalar<float64>
         %5 = 1.0                               # ClearScalar<float64>
         %6 = [-2.8879607]                      # ClearTensor<float32, shape=(1,)>
         %7 = 4.73919145867945e-05              # ClearScalar<float64>
         %8 = [[255]]                           # ClearTensor<uint8, shape=(1, 1)>
         %9 = 80                                # ClearScalar<uint7>
        %10 = input                             # EncryptedTensor<uint2, shape=(1, 1)>
        %11 = astype(%10, dtype=float32)        # EncryptedTensor<float32, shape=(1, 1)>
        %12 = add(%11, %9)                      # EncryptedTensor<float32, shape=(1, 1)>
        %13 = add(%12, %8)                      # EncryptedTensor<float64, shape=(1, 1)>
        %14 = multiply(%7, %13)                 # EncryptedTensor<float64, shape=(1, 1)>
        %15 = add(%14, %6)                      # EncryptedTensor<float64, shape=(1, 1)>
        %16 = negative(%15)                     # EncryptedTensor<float64, shape=(1, 1)>
        %17 = exp(%16)                          # EncryptedTensor<float64, shape=(1, 1)>
        %18 = add(%5, %17)                      # EncryptedTensor<float64, shape=(1, 1)>
        %19 = true_divide(%4, %18)              # EncryptedTensor<float64, shape=(1, 1)>
        %20 = true_divide(%19, %3)              # EncryptedTensor<float64, shape=(1, 1)>
        %21 = add(%20, %2)                      # EncryptedTensor<float64, shape=(1, 1)>
        %22 = rint(%21)                         # EncryptedTensor<float64, shape=(1, 1)>
        %23 = clip(%22, %0, %1)                 # EncryptedTensor<float64, shape=(1, 1)>
        %24 = astype(%23, dtype=int_)           # EncryptedTensor<uint1, shape=(1, 1)>
        return %24

In [5]:
import numpy as np

def read_weights(filename):
    # Read weights from file
    with open(filename, 'r') as file:
        lines = file.readlines()

    # Extract weights (excluding the intercept)
    weights = np.loadtxt(lines[:-1])

    # Extract intercept (last line)
    intercept = float(lines[-1])

    return weights, intercept

def read_test_data(filename, n):
    # Read test data from file
    with open(filename, 'r') as file:
        data = np.loadtxt(file, usecols=range(n))
    return data

def calculate_dot_products(weights, intercept, data):
    # Calculate dot products for each row of data
    print(weights, intercept, data)
    dot_products = np.dot(data, weights)+intercept
    return dot_products

def logistic_function(x):
    # Calculate logistic function 1 / (1 + exp(-x))
    return 1.0 / (1.0 + np.exp(-x))

def find_min_max_dot_products(weights_filename, test_data_filename):
    # Read weights for different n values
    n_values = [4]
    min_dot_products = []
    max_dot_products = []

    for n in n_values:
        # Construct filenames
        weights_filename_n = weights_filename.replace('$n', str(n))
        test_data_filename_n = test_data_filename.replace('$n', str(n))

        # Read weights and intercept
        weights, intercept = read_weights(weights_filename_n)
        data = read_test_data(test_data_filename_n, n)

        # Calculate dot products
        dot_products = calculate_dot_products(weights, intercept, data)

        # Find minimum and maximum dot products
        min_dot_products.append(np.min(dot_products))
        max_dot_products.append(np.max(dot_products))
        
        for dot_product in dot_products:
            logistic_output = logistic_function(dot_product)
            print(f"   Dot Product: {dot_product}, Logistic Output: {logistic_output}")

    return min_dot_products, max_dot_products

# Example usage
weights_filename = "weights_$nD.txt"
test_data_filename = "test_data_$nD.txt"

min_dot_products, max_dot_products = find_min_max_dot_products(weights_filename, test_data_filename)

# Print results
for i, n in enumerate([4]):
    print(f"For n={n}:")
    print(f"   Minimum Dot Product: {min_dot_products[i]}")
    print(f"   Maximum Dot Product: {max_dot_products[i]}")
    print()


[2.63872993 2.35402328 0.14803324 0.0739867 ] -3.9148855975417045 [[0.01439349 0.11607264 0.04600264 0.0407288 ]
 [0.65196126 0.22426931 0.71217922 0.23724909]
 [0.89000534 0.33799516 0.37558295 0.09398194]
 [0.34106635 0.11347352 0.92469362 0.87733935]
 [0.69093774 0.38673535 0.93672999 0.13752094]
 [0.67213555 0.76161962 0.23763754 0.72821635]
 [0.88721274 0.47221493 0.11959425 0.71324479]
 [0.54922666 0.71459592 0.66019738 0.2799339 ]
 [0.12203823 0.49517691 0.03438852 0.9093204 ]
 [0.37454012 0.95071431 0.73199394 0.59865848]
 [0.00552212 0.81546143 0.70685734 0.72900717]
 [0.80744016 0.8960913  0.31800347 0.11005192]
 [0.82260056 0.36019064 0.12706051 0.52224326]
 [0.11986537 0.33761517 0.9429097  0.32320293]
 [0.38816993 0.64328822 0.45825289 0.54561679]
 [0.30424224 0.52475643 0.43194502 0.29122914]
 [0.72609133 0.97585208 0.51630035 0.32295647]
 [0.79518619 0.27083225 0.43897142 0.07845638]
 [0.54671028 0.18485446 0.96958463 0.77513282]
 [0.22793516 0.42710779 0.81801477 0.8607