In [33]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from concrete.ml.sklearn import LogisticRegression as LR
import time

In [34]:
def generate_data(n, num_points=100):
    np.random.seed(42)
    X = np.random.rand(num_points, n)
    
    # Make the criteria for assigning y more non-linear
    y = ((np.sum(X[:, :n//2] >= 0.5, axis=1) > n//4) & (np.sum(X[:, :n//2] >= 0.7, axis=1) > n//6)).astype(int)
    
    return X, y

def save_test_data_to_file(filename, X_test, y_test):
    with open(filename, 'w') as file:
        for i in range(X_test.shape[0]):
            line = ' '.join(map(str, X_test[i])) + ' 1 '+f' {y_test[i]}\n'
            file.write(line)

def save_weights_to_file(filename, weights, intercept):
    with open(filename, 'w') as file:
        file.write(' '.join(map(str, weights)))
        file.write(f'\n{intercept}\n')

In [36]:
dimensions = [2, 4, 8, 16, 32]

for n in dimensions:
    X, y = generate_data(n)

    # Split the dataset into training and test sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train a logistic regression classifier
    clf = LogisticRegression(random_state=42)
    clf.fit(X_train, y_train)

    # Test the classifier on the test set and measure prediction time
    start_time = time.time()
    y_pred = clf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_pred)

    # Save test data to a file
    test_data_filename = f'test_data_{n}D.txt'
    save_test_data_to_file(test_data_filename, X_test, y_test)
    
    # Save weights to a file
    weights_filename = f'weights_{n}D.txt'
    save_weights_to_file(weights_filename, clf.coef_[0], clf.intercept_[0])

    result_filename = f'results_{n}D.txt'
    with open(result_filename, 'w') as file:
        file.write(f'Test Accuracy: {test_accuracy:.4f}\n')
        file.write(f'Time to predict on test data: {prediction_time:.6f} seconds\n')

    print(f'Test data with {n} dimensions processed.')

    model = LR(n_bits=8)
    model.fit(X_train, y_train)

    # We can simulate the predictions in the clear
    y_pred_clear = model.predict(X_test)
    print(type(model), type(clf))
    # We then compile on a representative set 
    model.compile(X_train)
    

    # Finally we run the inference on encrypted inputs !
    y_pred_fhe = model.predict(X_test, fhe="execute")
    test_accuracy_fhe = accuracy_score(y_test, y_pred_fhe)
    test_accuracy_clear_concrete = accuracy_score(y_test, y_pred_clear)

    print("Accuracy_clear_concrete  :", test_accuracy_clear_concrete)
    print("Accuracy_FHE    :", test_accuracy_fhe)
    print(f"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%")

Test data with 2 dimensions processed.


TypeError: predict() got an unexpected keyword argument 'fhe'

In [28]:
import numpy as np

def read_weights(filename):
    # Read weights from file
    with open(filename, 'r') as file:
        lines = file.readlines()

    # Extract weights (excluding the intercept)
    weights = np.loadtxt(lines[:-1])

    # Extract intercept (last line)
    intercept = float(lines[-1])

    return weights, intercept

def read_test_data(filename, n):
    # Read test data from file
    with open(filename, 'r') as file:
        data = np.loadtxt(file, usecols=range(n))
    return data

def calculate_dot_products(weights, intercept, data):
    # Calculate dot products for each row of data
    print(weights, intercept, data)
    dot_products = np.dot(data, weights)+intercept
    return dot_products

def logistic_function(x):
    # Calculate logistic function 1 / (1 + exp(-x))
    return 1.0 / (1.0 + np.exp(-x))

def find_min_max_dot_products(weights_filename, test_data_filename):
    # Read weights for different n values
    n_values = [2]
    min_dot_products = []
    max_dot_products = []

    for n in n_values:
        # Construct filenames
        weights_filename_n = weights_filename.replace('$n', str(n))
        test_data_filename_n = test_data_filename.replace('$n', str(n))

        # Read weights and intercept
        weights, intercept = read_weights(weights_filename_n)
        data = read_test_data(test_data_filename_n, n)

        # Calculate dot products
        dot_products = calculate_dot_products(weights, intercept, data)

        # Find minimum and maximum dot products
        min_dot_products.append(np.min(dot_products))
        max_dot_products.append(np.max(dot_products))
        
        for dot_product in dot_products:
            logistic_output = logistic_function(dot_product)
            print(f"   Dot Product: {dot_product}, Logistic Output: {logistic_output}")

    return min_dot_products, max_dot_products

# Example usage
weights_filename = "weights_$nD.txt"
test_data_filename = "test_data_$nD.txt"

min_dot_products, max_dot_products = find_min_max_dot_products(weights_filename, test_data_filename)

# Print results
for i, n in enumerate([2, 4, 8, 16, 32]):
    print(f"For n={n}:")
    print(f"   Minimum Dot Product: {min_dot_products[i]}")
    print(f"   Maximum Dot Product: {max_dot_products[i]}")
    print()


[3.62994456 0.4897676 ] -2.8879605813591267 [[0.32078006 0.18651851]
 [0.41038292 0.75555114]
 [0.96244729 0.2517823 ]
 [0.11959425 0.71324479]
 [0.88721274 0.47221493]
 [0.35846573 0.11586906]
 [0.25877998 0.66252228]
 [0.36778313 0.63230583]
 [0.61185289 0.13949386]
 [0.37454012 0.95071431]
 [0.30461377 0.09767211]
 [0.38867729 0.27134903]
 [0.60956433 0.50267902]
 [0.14092422 0.80219698]
 [0.34106635 0.11347352]
 [0.60111501 0.70807258]
 [0.14489487 0.48945276]
 [0.98565045 0.24205527]
 [0.45606998 0.78517596]
 [0.82873751 0.35675333]
 [0.28975145 0.16122129]
 [0.69093774 0.38673535]
 [0.93949894 0.89482735]
 [0.31098232 0.32518332]
 [0.3636296  0.97178208]
 [0.60754485 0.17052412]
 [0.86310343 0.62329813]
 [0.90041806 0.63310146]
 [0.43194502 0.29122914]
 [0.28484049 0.03688695]]
   Dot Product: -1.6321960047078998, Logistic Output: 0.16352975263471395
   Dot Product: -1.0282488510032963, Logistic Output: 0.2634237417459443
   Dot Product: 0.7289845560611683, Logistic Output: 0.674

IndexError: list index out of range