In [5]:
import time

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
from xgboost.sklearn import XGBClassifier as SklearnXGBClassifier

from concrete.ml.sklearn import XGBClassifier

RANDOM_STATE = 0

In [6]:
X_balance = pd.read_csv("data/x_balance.csv", encoding="utf-8")
Y_balance = pd.read_csv("data/y_balance.csv", encoding="utf-8").squeeze(axis=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X_balance,
    Y_balance,
    stratify=Y_balance,
    test_size=0.3,
    random_state=RANDOM_STATE,
)

In [8]:
concrete_model = XGBClassifier(n_bits=6, learning_rate=None)
concrete_model, sklearn_model = concrete_model.fit_benchmark(X_train, y_train)

In [5]:
y_pred_sklearn = sklearn_model.predict(X_test)

print(f"Accuracy score (Scikit-Learn) is {accuracy_score(y_test, y_pred_sklearn)}")

Accuracy score (Scikit-Learn) is 0.9217021850553008


In [6]:
concrete_model.compile(X_train)

<concrete.fhe.compilation.circuit.Circuit at 0x2b93cffd0>

In [7]:
y_pred_fhe_no_dual = concrete_model.predict(X_test, fhe="simulate")

print(f"Accuracy score (Concrete ML simulation) is {accuracy_score(y_test, y_pred_fhe_no_dual)}")

Accuracy score (Concrete ML simulation) is 0.9193417858106285


In [None]:
FHE_SAMPLE = 10

concrete_model.fhe_circuit.keygen()

start = time.time()
concrete_model.predict(X_test[:FHE_SAMPLE], fhe="execute")
end = time.time() - start

print(f"FHE execution time per inference: {end / FHE_SAMPLE :.2}s")

FHE execution time per inference: 2.7s


# Dual input : 1/30 encrypted feature


In [10]:
X_train_enc = X_train["begin_month"].to_frame()
X_train_clear = X_train.drop("begin_month", axis=1)

X_test_enc = X_test["begin_month"].to_frame()
X_test_clear = X_test.drop("begin_month", axis=1)

In [11]:
inputs_encryption_status = ("encrypted", "clear")

concrete_model.compile(
    X_train_enc, X_train_clear, inputs_encryption_status=inputs_encryption_status
)

<concrete.fhe.compilation.circuit.Circuit at 0x2bacd6fe0>

In [12]:
print(concrete_model.fhe_circuit.graph.maximum_integer_bit_width())

6


In [13]:
concrete_model.fhe_circuit.keygen()

In [14]:
def predict_dual_inputs(concrete_model, inputs_enc, inputs_clear, fhe_sample=None, simulate=True):
    if isinstance(inputs_enc, (pd.DataFrame, pd.Series)):
        inputs_enc = inputs_enc.to_numpy()

    if isinstance(inputs_clear, (pd.DataFrame, pd.Series)):
        inputs_clear = inputs_clear.to_numpy()

    if fhe_sample is None:
        fhe_sample = len(inputs_enc)

    if not simulate:
        concrete_model.fhe_circuit.keygen()

    # We create a loop to send the input to the server and receive the encrypted prediction
    y_preds = []
    execution_times = []
    for input_enc, input_clear in zip(inputs_enc[:fhe_sample], inputs_clear[:fhe_sample]):
        input_enc, input_clear = np.expand_dims(input_enc, axis=0), np.expand_dims(
            input_clear, axis=0
        )
        q_input_enc, q_input_clear = concrete_model.quantize_input(input_enc, input_clear)

        if simulate:
            q_y_proba = concrete_model.fhe_circuit.graph(
                q_input_enc, q_input_clear, p_error=concrete_model.fhe_circuit.p_error
            )
        else:
            q_input_enc, _ = concrete_model.fhe_circuit.encrypt(q_input_enc, None)
            _, q_input_clear = concrete_model.fhe_circuit.encrypt(None, q_input_clear)

            start = time.time()
            q_y_proba_enc = concrete_model.fhe_circuit.run(q_input_enc, q_input_clear)
            end = time.time() - start

            execution_times.append(end)

            q_y_proba = concrete_model.fhe_circuit.decrypt(q_y_proba_enc)

        y_proba = concrete_model.dequantize_output(q_y_proba)

        y_proba = concrete_model.post_processing(y_proba)

        y_pred = np.argmax(y_proba, axis=1)

        y_preds.append(y_pred)

    if not simulate:
        print(f"FHE execution time per inference: {np.mean(execution_times) :.2}s")

    return np.array(y_preds)

In [15]:
y_preds_simulated = predict_dual_inputs(concrete_model, X_test_enc, X_test_clear, simulate=True)

print(f"Accuracy score (Concrete ML simulation) is {accuracy_score(y_test, y_preds_simulated)}")

Accuracy score (Concrete ML simulation) is 0.9193417858106285


In [16]:
y_preds_dual = predict_dual_inputs(
    concrete_model, X_test_enc, X_test_clear, fhe_sample=10, simulate=False
)

FHE execution time per inference: 1.5s


# Dual input : 29/30 encrypted feature


In [None]:
X_train_clear_1 = X_train["begin_month"].to_frame()
X_train_enc_29 = X_train.drop("begin_month", axis=1)

X_test_clear_1 = X_test["begin_month"].to_frame()
X_test_enc_29 = X_test.drop("begin_month", axis=1)

In [None]:
inputs_encryption_status = ("clear", "encrypted")

concrete_model.compile(
    X_train_clear_1, X_train_enc_29, inputs_encryption_status=inputs_encryption_status
)

<concrete.fhe.compilation.circuit.Circuit at 0x2b728cd00>

In [None]:
y_preds_simulated_29 = predict_dual_inputs(
    concrete_model, X_test_clear_1, X_test_enc_29, simulate=True
)

print(f"Accuracy score (Concrete ML simulation) is {accuracy_score(y_test, y_preds_simulated_29)}")

Accuracy score (Concrete ML simulation) is 0.8774615592123011


In [None]:
y_preds_dual_29 = predict_dual_inputs(
    concrete_model, X_test_clear_1, X_test_enc_29, fhe_sample=10, simulate=False
)

FHE execution time per inference: 2.5s
