In [None]:
pip install concrete-ml

Collecting concrete-ml
  Downloading concrete_ml-1.7.0-py3-none-any.whl.metadata (17 kB)
Collecting brevitas==0.10.2 (from concrete-ml)
  Downloading brevitas-0.10.2-py3-none-any.whl.metadata (7.6 kB)
Collecting concrete-python==2.8.1 (from concrete-ml)
  Downloading concrete_python-2.8.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting hummingbird-ml==0.4.11 (from hummingbird-ml[onnx]==0.4.11->concrete-ml)
  Downloading hummingbird_ml-0.4.11-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting numpy==1.24.2 (from concrete-ml)
  Downloading numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting onnx==1.16.1 (from concrete-ml)
  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxoptimizer==0.3.13 (from concrete-ml)
  Downloading onnxoptimizer-0.3.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Collecting onnxruntime==1.18 (from concr

In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)
n_rows = 10000

data = {
    "PatientID": np.arange(1, n_rows + 1),
    "Age": np.random.randint(20, 80, n_rows),
    "Gender": np.random.choice(["M", "F"], n_rows),
    "BMI": np.round(np.random.uniform(18.5, 40.0, n_rows), 1),
    "SmokingStatus": np.random.choice(["Current", "Former", "Never"], n_rows),
    "BloodPressure": np.random.choice(["120/80", "130/85", "140/90", "150/95", "125/82"], n_rows),
    "Cholesterol": np.random.choice(["Normal", "Borderline", "High"], n_rows),
    "FamilyHistory": np.random.choice(["Yes", "No"], n_rows),
    "PhysicalActivity": np.random.choice(["Low", "Moderate", "High"], n_rows),
    "AlcoholIntake": np.random.choice(["Low", "Moderate", "High"], n_rows),
    "DietQuality": np.random.choice(["Poor", "Moderate", "Good", "Excellent"], n_rows),
    "StressLevel": np.random.choice(["Low", "Medium", "High"], n_rows),
    "GlucoseLevel": np.round(np.random.uniform(70, 180, n_rows), 1),
    "HbA1c": np.round(np.random.uniform(4.5, 8.0, n_rows), 1)
}

df = pd.DataFrame(data)

mappings = {
    "Gender": {"M": 0, "F": 1},
    "SmokingStatus": {"Current": 2, "Former": 1, "Never": 0},
    "BloodPressure": {"120/80": 1, "130/85": 2, "140/90": 3, "150/95": 4, "125/82": 1.5},
    "Cholesterol": {"Normal": 1, "Borderline": 2, "High": 3},
    "FamilyHistory": {"Yes": 1, "No": 0},
    "PhysicalActivity": {"Low": 1, "Moderate": 2, "High": 3},
    "AlcoholIntake": {"Low": 0, "Moderate": 1, "High": 2},
    "DietQuality": {"Poor": 1, "Moderate": 2, "Good": 3, "Excellent": 4},
    "StressLevel": {"Low": 1, "Medium": 2, "High": 3}
}

for col, mapping in mappings.items():
    df[col] = df[col].map(mapping)

df["DiabetesRiskScore"] = (
    0.1 * df["Age"] + 0.2 * df["BMI"]**1.5 + 0.25 * np.log1p(df["GlucoseLevel"]) +
    0.15 * df["SmokingStatus"] + 0.3 * df["BloodPressure"] +
    0.25 * df["Cholesterol"] * df["DietQuality"] + 0.2 * df["FamilyHistory"] +
    0.1 * df["PhysicalActivity"] + np.random.normal(0, 2, n_rows)
).astype(int)

df["HeartDiseaseRiskScore"] = (
    0.2 * np.sqrt(df["Age"]) + 0.15 * df["BMI"] + 0.3 * df["BloodPressure"] +
    0.25 * df["Cholesterol"]**2 + 0.2 * df["StressLevel"] +
    0.15 * df["PhysicalActivity"] * df["DietQuality"] +
    0.25 * df["AlcoholIntake"] + np.random.normal(0, 3, n_rows)
).astype(int)

df["DiabetesRiskScore"] = df["DiabetesRiskScore"].clip(0, 100)
df["HeartDiseaseRiskScore"] = df["HeartDiseaseRiskScore"].clip(0, 100)

df.head()


Unnamed: 0,PatientID,Age,Gender,BMI,SmokingStatus,BloodPressure,Cholesterol,FamilyHistory,PhysicalActivity,AlcoholIntake,DietQuality,StressLevel,GlucoseLevel,HbA1c,DiabetesRiskScore,HeartDiseaseRiskScore
0,1,58,0,27.5,0,2.0,1,0,1,1,3,3,84.7,6.4,35,6
1,2,71,0,26.0,0,1.5,3,0,3,1,2,2,141.6,5.1,37,10
2,3,48,0,37.0,2,3.0,3,1,3,0,4,1,177.5,5.5,56,14
3,4,34,0,21.2,2,3.0,1,0,3,0,2,1,119.8,6.3,26,8
4,5,62,0,30.7,1,1.5,3,0,3,2,1,1,86.4,7.9,42,7


In [None]:
from sklearn.model_selection import train_test_split
from concrete.ml.sklearn import LinearRegression

df["DiabetesRiskScore"] = (
    0.1 * df["Age"] + 0.2 * df["BMI"] + 0.3 * np.log1p(df["GlucoseLevel"]) + np.random.normal(0, 5, n_rows)
).clip(0, 100)

X = df[["Age", "BMI", "GlucoseLevel"]]
y = df["DiabetesRiskScore"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

concrete_model = LinearRegression()
concrete_model.fit(X_train, y_train)

y_pred_clear = concrete_model.predict(X_test)

concrete_model.compile(X_train)

y_pred_fhe = concrete_model.predict(X_test, fhe="execute")

print("In clear  :", y_pred_clear)
print("In FHE    :", y_pred_fhe)
print(f"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%")


In clear  : [ 9.30646006 11.53048772 16.6264881  ... 10.03628569 11.53048772
  8.51699887]
In FHE    : [ 9.30646006 11.53048772 16.6264881  ... 10.03628569 11.53048772
  8.51699887]
Similarity: 100%
