In [23]:
import shutil
from pathlib import Path

import numpy
import pandas
from pandas.testing import assert_frame_equal

from concrete.ml.pandas import (
    encrypt_from_pandas,
    get_client_and_eval_keys,
    load_encrypted_dataframe,
)

numpy.random.seed(0)

# Client 1

In [24]:
def delete_all_files_except_one(dir_path, file_name):
    if dir_path.is_dir():
        for item in dir_path.iterdir():
            if item.name != file_name:
                item.unlink()


# Path directory for client and server files
CLIENT_1_DIR = Path("client_1")
CLIENT_2_DIR = Path("client_2")

delete_all_files_except_one(CLIENT_1_DIR, "df_left.csv")
delete_all_files_except_one(CLIENT_2_DIR, "df_right.csv")

# Pandas kwargs
HOW = "left"
ON = "id"

In [25]:
df_left = pandas.read_csv(CLIENT_1_DIR / "df_left.csv")

df_left

Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3
0,1,13,31.4,apple
1,2,6,1.9,orange
2,3,1,10.9,apple
3,4,4,-22.2,watermelon
4,5,12,-5.7,watermelon
5,6,4,0.43,cherry
6,7,8,-0.112,apple
7,8,10,45.0,orange
8,9,4,13.1,watermelon
9,10,6,5.55,orange


In [4]:
client_1_keys_path = CLIENT_1_DIR / "keys"

client_1, evaluation_keys_1 = get_client_and_eval_keys(keys_path=client_1_keys_path)

In [5]:
df_left_enc = encrypt_from_pandas(df_left, client_1, evaluation_keys_1)

In [6]:
df_left_enc.print_scheme()

Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3
dtype,int64,int64,float64,object
scale,,,0.208333,
zero_point,,,-6.0,
str_to_int,,,,"{'apple': 1, 'orange': 2, 'watermelon': 3, 'ch..."


In [7]:
df_left_enc_path = CLIENT_1_DIR / "df_left_enc.json"
df_left_enc.to_json(df_left_enc_path)

# Client 2

In [8]:
df_right = pandas.read_csv(CLIENT_2_DIR / "df_right.csv")

df_right

Unnamed: 0,id,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,4,4,2.82,4,4,56.712
1,5,1,9.71,9,14,3.1838
2,6,4,-3.9,2,4,133.1


Clients need to share private keys

In [9]:
client_2_keys_path = CLIENT_2_DIR / "keys"

shutil.copy2(client_1_keys_path, client_2_keys_path);

In [10]:
client_2, evaluation_keys_2 = get_client_and_eval_keys(keys_path=client_2_keys_path)

In [11]:
df_right_enc = encrypt_from_pandas(df_right, client_2, evaluation_keys_2)

In [12]:
df_right_enc

id,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
..fd8a6da5f2..,..585c0e2bec..,..a1cf2f1ec7..,..d26b6ff514..,..1b95bcde4f..,..e9b0a3572a..
..a13dbb7cdf..,..65ee900b63..,..7b9f264234..,..010a271eab..,..43b7991796..,..7cd4802daa..
..7b80af603a..,..f99b59f413..,..500954b096..,..eb23f5ba89..,..b5462fd037..,..e9572a0c09..


In [13]:
df_right_enc_path = CLIENT_2_DIR / "df_right_enc.json"
df_right_enc.to_json(df_right_enc_path)

# Server

In [14]:
df_left_enc = load_encrypted_dataframe(df_left_enc_path)
df_right_enc = load_encrypted_dataframe(df_right_enc_path)

In [15]:
df_joined_enc_server = df_left_enc.merge(df_right_enc, how=HOW, on=ON)

Both clients are able decrypt the result

In [16]:
df_joined_enc_server_path = CLIENT_1_DIR / "df_joined_enc.json"

df_joined_enc_server.to_json(df_joined_enc_server_path)

# Client

In [17]:
df_joined_enc = load_encrypted_dataframe(df_joined_enc_server_path)

In [18]:
df_joined_cml = df_joined_enc.decrypt_to_pandas(client_1)

## Concrete ML vs Pandas comparison


In [19]:
def df_are_equal(df_1, df_2):
    """Determines if both data-frames are identical, including NaN values.

    NaN values have the property of no being equal to one another (ie NaN != NaN). In the following
    notebook we want to be able to determine if the CP result is identical to Pandas, including the
    NaNs positions (meaning we want to have NaN == NaN)
    """
    try:
        assert_frame_equal(df_1, df_2, check_dtype=False)
        return True
    except AssertionError:
        return False

In [20]:
# Compute the left-joined data-frame using Pandas
df_joined_pandas = pandas.merge(df_left, df_right, on=ON, how=HOW)

df_joined_pandas

Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,1,13,31.4,apple,,,,,
1,2,6,1.9,orange,,,,,
2,3,1,10.9,apple,,,,,
3,4,4,-22.2,watermelon,4.0,2.82,4.0,4.0,56.712
4,5,12,-5.7,watermelon,1.0,9.71,9.0,14.0,3.1838
5,6,4,0.43,cherry,4.0,-3.9,2.0,4.0,133.1
6,7,8,-0.112,apple,,,,,
7,8,10,45.0,orange,,,,,
8,9,4,13.1,watermelon,,,,,
9,10,6,5.55,orange,,,,,


In [21]:
# Compte the joined Pandas data-frame to the Concrete ML result
print("Concrete ML result is equal to Pandas:", df_are_equal(df_joined_pandas, df_joined_cml), "\n")

df_joined_cml

Concrete ML result is equal to Pandas: False 



Unnamed: 0,id,feat_left_1,feat_left_2,feat_left_3,feat_right_1,feat_right_2,feat_right_3,feat_right_4,feat_right_5
0,1,13,33.6,apple,,,,,
1,2,6,0.0,orange,,,,,
2,3,1,9.6,apple,,,,,
3,4,4,-24.0,watermelon,4.0,2.916429,4.0,4.0,55.678371
4,5,12,-4.8,watermelon,1.0,9.721429,9.0,14.0,0.0
5,6,4,0.0,cherry,4.0,-3.888571,2.0,4.0,129.9162
6,7,8,0.0,apple,,,,,
7,8,10,43.2,orange,,,,,
8,9,4,14.4,watermelon,,,,,
9,10,6,4.8,orange,,,,,
