In [1]:
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory

import numpy
import pandas

from concrete.ml.pandas import ClientEngine, load_encrypted_dataframe
from concrete.ml.pytest.utils import pandas_dataframe_are_equal

numpy.random.seed(0)

# pylint: disable=pointless-statement, consider-using-with

# Client 1

In [2]:
# Path directory for client and server files
CLIENT_1_DIR = Path("client_1")
CLIENT_2_DIR = Path("client_2")

# Pandas kwargs
HOW = "left"
ON = "index"

In [3]:
# Using the "Tips" dataset : https://www.kaggle.com/code/sanjanabasu/tips-dataset/input
# It got separated into two separate files for this notebook
df_left = pandas.read_csv(CLIENT_1_DIR / "df_left.csv")

df_left

Unnamed: 0,index,total_bill,tip,sex,smoker
0,1,12.54,2.5,Male,No
1,2,11.17,1.5,Female,No
2,3,20.29,2.75,Female,No
3,4,14.07,2.5,Male,No
4,5,15.69,3.0,Male,Yes
5,6,18.29,3.0,Male,No
6,7,16.93,3.07,Female,No
7,8,24.27,2.03,Male,Yes
8,9,8.77,2.0,Male,No


In [4]:
client_1_temp_dir = TemporaryDirectory(dir=str(CLIENT_1_DIR))
client_1_temp_path = Path(client_1_temp_dir.name)

client_1_keys_path = client_1_temp_path / "keys"

client_1 = ClientEngine(keys_path=client_1_keys_path)

In [5]:
df_left_enc = client_1.encrypt_from_pandas(df_left)

In [6]:
df_left_enc.get_schema()

Unnamed: 0,index,total_bill,tip,sex,smoker
dtype,int64,float64,float64,object,object
scale,,0.903226,8.917197,,
zero_point,,6.92129,12.375796,,
str_to_int,,,,"{'Male': 1, 'Female': 2}","{'No': 1, 'Yes': 2}"


In [7]:
df_left_enc_path = client_1_temp_path / "df_left_enc"
df_left_enc.save(df_left_enc_path)

# Client 2

In [8]:
df_right = pandas.read_csv(CLIENT_2_DIR / "df_right.csv")

df_right

Unnamed: 0,index,day,time,size
0,2,Thur,Lunch,2
1,5,Sat,Dinner,3
2,9,Sun,Dinner,2


Clients need to share private keys

In [9]:
client_2_temp_dir = TemporaryDirectory(dir=str(CLIENT_2_DIR))
client_2_temp_path = Path(client_2_temp_dir.name)

client_2_keys_path = client_2_temp_path / "keys"

shutil.copy2(client_1_keys_path, client_2_keys_path);

In [10]:
client_2 = ClientEngine(keys_path=client_2_keys_path)

In [11]:
df_right_enc = client_2.encrypt_from_pandas(df_right)

In [12]:
df_right_enc

index,day,time,size
..48d4814937..,..dd6b288e52..,..497a80e2dd..,..41f496fe3a..
..0a19fbfc58..,..047a92f5bc..,..7f7a6f1167..,..5ca8e5edfc..
..79c726effe..,..6835b68ece..,..4ae3bca370..,..f4eb2bde07..


In [13]:
df_right_enc_path = client_2_temp_path / "df_right_enc"
df_right_enc.save(df_right_enc_path)

# Server

In [14]:
df_left_enc = load_encrypted_dataframe(df_left_enc_path)
df_right_enc = load_encrypted_dataframe(df_right_enc_path)

In [15]:
df_joined_enc_server = df_left_enc.merge(df_right_enc, how=HOW, on=ON)

Both clients are able decrypt the result

In [16]:
df_joined_enc_server_path = client_1_temp_path / "df_joined_enc"

df_joined_enc_server.save(df_joined_enc_server_path)

# Client

In [17]:
df_joined_enc = load_encrypted_dataframe(df_joined_enc_server_path)

In [18]:
df_joined_cml = client_1.decrypt_to_pandas(df_joined_enc)

In [19]:
df_joined_cml

Unnamed: 0,index,total_bill,tip,sex,smoker,day,time,size
0,1,12.091429,2.509286,Male,No,,,
1,2,10.984286,1.5,Female,No,Thur,Lunch,2.0
2,3,19.841429,2.733571,Female,No,,,
3,4,14.305714,2.509286,Male,No,,,
4,5,15.412857,2.957857,Male,Yes,Sat,Dinner,3.0
5,6,18.734286,2.957857,Male,No,,,
6,7,16.52,3.07,Female,No,,,
7,8,24.27,2.060714,Male,Yes,,,
8,9,8.77,1.948571,Male,No,Sun,Dinner,2.0


## Concrete ML vs Pandas comparison


In [20]:
# Compute the left-joined data-frame using Pandas
df_joined_pandas = pandas.merge(df_left, df_right, on=ON, how=HOW)

df_joined_pandas

Unnamed: 0,index,total_bill,tip,sex,smoker,day,time,size
0,1,12.54,2.5,Male,No,,,
1,2,11.17,1.5,Female,No,Thur,Lunch,2.0
2,3,20.29,2.75,Female,No,,,
3,4,14.07,2.5,Male,No,,,
4,5,15.69,3.0,Male,Yes,Sat,Dinner,3.0
5,6,18.29,3.0,Male,No,,,
6,7,16.93,3.07,Female,No,,,
7,8,24.27,2.03,Male,Yes,,,
8,9,8.77,2.0,Male,No,Sun,Dinner,2.0


In [21]:
# Compte the joined Pandas data-frame to the Concrete ML result
df_are_equal = pandas_dataframe_are_equal(
    df_joined_pandas, df_joined_cml, float_rtol=0.1, equal_nan=True
)

print("Concrete ML data-frame is equal to Pandas data-frame:", df_are_equal, "\n")

Concrete ML data-frame is equal to Pandas data-frame: True 



In [22]:
client_1_temp_dir.cleanup()
client_2_temp_dir.cleanup()