In [2]:
import json
import shutil
from pathlib import Path

import numpy
import pandas
from pandas.testing import assert_frame_equal

from concrete.ml.pandas import (
    encrypt_from_pandas,
    get_client_and_eval_keys,
    load_encrypted_dataframe,
)

numpy.random.seed(0)

# Client 1

In [3]:
def delete_all_files_except_one(dir_path, file_name):
    if dir_path.is_dir():
        for item in dir_path.iterdir():
            if item.name != file_name:
                item.unlink()


# Path directory for client and server files
CLIENT_1_DIR = Path("client_1")
CLIENT_2_DIR = Path("client_2")

delete_all_files_except_one(CLIENT_1_DIR, "df_left.csv")
delete_all_files_except_one(CLIENT_2_DIR, "df_right.csv")

# Pandas kwargs
HOW = "left"
ON = "id"

In [4]:
df_left = pandas.read_csv(CLIENT_1_DIR / "df_left.csv", index_col=0)

df_left

Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3
0,1,13,3,8
1,2,6,5,8
2,3,1,8,9
3,4,4,7,2
4,5,12,9,6
5,6,4,9,10
6,7,8,13,14
7,8,10,11,9
8,9,4,2,10
9,10,6,7,5


In [5]:
client_1_keys_path = CLIENT_1_DIR / "keys"

client_1, evaluation_keys_1 = get_client_and_eval_keys(keys_path=client_1_keys_path)

In [6]:
df_left_enc = encrypt_from_pandas(df_left, client_1, evaluation_keys_1)

In [7]:
df_left_enc_path = CLIENT_1_DIR / "df_left_enc.json"
df_left_enc.to_json(df_left_enc_path)

# Client 2

In [8]:
df_right = pandas.read_csv(CLIENT_2_DIR / "df_right.csv", index_col=0)

df_right

Unnamed: 0,id,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,4,4,6,4,4,4
1,5,1,1,9,14,8
2,6,4,3,2,4,1


Clients need to share private keys

In [9]:
client_2_keys_path = CLIENT_2_DIR / "keys"

shutil.copy2(client_1_keys_path, client_2_keys_path);

In [10]:
client_2, evaluation_keys_2 = get_client_and_eval_keys(keys_path=client_2_keys_path)

In [11]:
df_right_enc = encrypt_from_pandas(df_right, client_2, evaluation_keys_2)

In [12]:
df_right_enc_path = CLIENT_2_DIR / "df_right_enc.json"
df_right_enc.to_json(df_right_enc_path)

# Server

In [13]:
df_left_enc = load_encrypted_dataframe(df_left_enc_path)
df_right_enc = load_encrypted_dataframe(df_right_enc_path)

In [14]:
df_joined_enc_server = df_left_enc.merge(df_right_enc, how=HOW, on=ON)

Both clients are able decrypt the result

In [15]:
df_joined_enc_server_path = CLIENT_1_DIR / "df_joined_enc.json"

df_joined_enc_server.to_json(df_joined_enc_server_path)

# Client

In [16]:
df_joined_enc = load_encrypted_dataframe(df_joined_enc_server_path)

In [17]:
df_joined_cml = df_joined_enc.decrypt_to_pandas(client_1)

## Concrete ML vs Pandas comparison


In [18]:
def df_are_equal(df_1, df_2):
    """Determines if both data-frames are identical, including NaN values.

    NaN values have the property of no being equal to one another (ie NaN != NaN). In the following
    notebook we want to be able to determine if the CP result is identical to Pandas, including the
    NaNs positions (meaning we want to have NaN == NaN)
    """
    try:
        assert_frame_equal(df_1, df_2, check_dtype=False)
        return True
    except AssertionError:
        return False

In [19]:
# Compute the left-joined data-frame using Pandas
df_joined_pandas = pandas.merge(df_left, df_right, on=ON, how=HOW)

df_joined_pandas

Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,1,13,3,8,,,,,
1,2,6,5,8,,,,,
2,3,1,8,9,,,,,
3,4,4,7,2,4.0,6.0,4.0,4.0,4.0
4,5,12,9,6,1.0,1.0,9.0,14.0,8.0
5,6,4,9,10,4.0,3.0,2.0,4.0,1.0
6,7,8,13,14,,,,,
7,8,10,11,9,,,,,
8,9,4,2,10,,,,,
9,10,6,7,5,,,,,


In [20]:
# Compte the joined Pandas data-frame to the Concrete ML result
print("Concrete ML result is equal to Pandas:", df_are_equal(df_joined_pandas, df_joined_cml), "\n")

df_joined_cml

Concrete ML result is equal to Pandas: True 



Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,1.0,13.0,3.0,8.0,,,,,
1,2.0,6.0,5.0,8.0,,,,,
2,3.0,1.0,8.0,9.0,,,,,
3,4.0,4.0,7.0,2.0,4.0,6.0,4.0,4.0,4.0
4,5.0,12.0,9.0,6.0,1.0,1.0,9.0,14.0,8.0
5,6.0,4.0,9.0,10.0,4.0,3.0,2.0,4.0,1.0
6,7.0,8.0,13.0,14.0,,,,,
7,8.0,10.0,11.0,9.0,,,,,
8,9.0,4.0,2.0,10.0,,,,,
9,10.0,6.0,7.0,5.0,,,,,
