chore: conformance

zama-ai · Apr 30, 2024 · ce9fb57 · ce9fb57
1 parent a5666f1
commit ce9fb57
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 59 deletions.
diff --git a/docs/advanced_examples/LogisticRegressionTraining.ipynb b/docs/advanced_examples/LogisticRegressionTraining.ipynb
@@ -30,8 +30,8 @@
     "from matplotlib.lines import Line2D\n",
     "from sklearn import datasets\n",
     "from sklearn.linear_model import SGDClassifier as SklearnSGDClassifier\n",
-    "from sklearn.preprocessing import MinMaxScaler\n",
     "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.preprocessing import MinMaxScaler\n",
     "\n",
     "from concrete.ml.sklearn import SGDClassifier\n",
     "\n",
@@ -383,10 +383,10 @@
     "    model_concrete_partial.partial_fit(x_batch, y_batch, fhe=\"simulate\")\n",
     "\n",
     "    model_concrete_partial.compile(x_train)\n",
-    "    \n",
+    "\n",
     "    # Measure accuracy of the model with FHE simulation\n",
     "    y_pred_partial_fhe = model_concrete_partial.predict(x_test, fhe=\"simulate\")\n",
-    "    \n",
+    "\n",
     "    accuracy_partial = accuracy_score(y_test, y_pred_partial_fhe).mean()\n",
     "    accuracy_scores.append(accuracy_partial)\n",
     "\n",
@@ -404,23 +404,6 @@
  "metadata": {
   "execution": {
    "timeout": 10800
-  },
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
   }
  },
  "nbformat": 4,

diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py
@@ -565,7 +565,9 @@ def all_values_are_floats(*values: Any) -> bool:
     return all(_is_of_dtype(value, SUPPORTED_FLOAT_TYPES) for value in values)
 
 
-def all_values_are_of_dtype(*values: Any, dtypes: Union[str, List[str]], allow_none: bool = False) -> bool:
+def all_values_are_of_dtype(
+    *values: Any, dtypes: Union[str, List[str]], allow_none: bool = False
+) -> bool:
     """Indicate if all unpacked values are of the specified dtype(s).
 
     Args:
@@ -591,10 +593,12 @@ def all_values_are_of_dtype(*values: Any, dtypes: Union[str, List[str]], allow_n
 
         supported_dtypes[dtype] = supported_dtype
 
-    # If the values can be None, only check the other values 
+    # If the values can be None, only check the other values
     if allow_none:
-        return all(_is_of_dtype(value, supported_dtypes) if value is not None else True for value in values)
-
+        return all(
+            _is_of_dtype(value, supported_dtypes) if value is not None else True for value in values
+        )
+
     return all(_is_of_dtype(value, supported_dtypes) for value in values)
 
 

diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py
@@ -559,7 +559,9 @@ def _fhe_forward(
             return q_results[0]
         return q_results
 
-    def quantize_input(self, *x: Optional[numpy.ndarray]) -> Union[numpy.ndarray, Tuple[Optional[numpy.ndarray], ...]]:
+    def quantize_input(
+        self, *x: Optional[numpy.ndarray]
+    ) -> Union[numpy.ndarray, Tuple[Optional[numpy.ndarray], ...]]:
         """Take the inputs in fp32 and quantize it using the learned quantization parameters.
 
         Args:
@@ -571,22 +573,38 @@ def quantize_input(self, *x: Optional[numpy.ndarray]) -> Union[numpy.ndarray, Tu
         """
         n_inputs = len(self.input_quantizers)
         n_values = len(x)
-        
+
         assert_true(
             n_values == n_inputs,
             f"Got {n_values} inputs, expected {n_inputs}. Either the quantized module has not been "
             "properly initialized or the input data has been changed since its initialization.",
             ValueError,
         )
-        
+
         assert not all(x_i is None for x_i in x), "Please provide at least one input to quantize."
 
-        q_x = tuple(self.input_quantizers[idx].quant(x[idx]) if x[idx] is not None else None for idx in range(len(x)))
+        # Ignore [arg-type] check from mypy as it is not able to see that the input to `quant`
+        # cannot be None
+        q_x = tuple(
+            (
+                self.input_quantizers[idx].quant(x[idx])  # type: ignore[arg-type]
+                if x[idx] is not None
+                else None
+            )
+            for idx in range(len(x))
+        )
 
         # Make sure all inputs are quantized to int64
-        assert all_values_are_of_dtype(*q_x, dtypes="int64", allow_none=True), "Inputs were not quantized to int64"
+        assert all_values_are_of_dtype(
+            *q_x, dtypes="int64", allow_none=True
+        ), "Inputs were not quantized to int64"
 
-        return q_x[0] if len(q_x) == 1 else q_x
+        if len(q_x) == 1:
+            assert q_x[0] is not None
+
+            return q_x[0]
+
+        return q_x
 
     def dequantize_output(
         self, *q_y_preds: numpy.ndarray
@@ -732,8 +750,15 @@ def compile(
         # Quantize the inputs
         q_inputs = self.quantize_input(*inputs)
 
+        # Make sure all inputs are quantized to int64 and are not None
+        assert all_values_are_of_dtype(
+            *to_tuple(q_inputs), dtypes="int64", allow_none=False
+        ), "Inputs were not quantized to int64"
+
         # Generate the input-set with proper dimensions
-        inputset = _get_inputset_generator(q_inputs)
+        # Ignore [arg-type] check from mypy as it is not able to see that no values in `q_inputs`
+        # is None
+        inputset = _get_inputset_generator(q_inputs)  # type: ignore[arg-type]
 
         # Check that p_error or global_p_error is not set in both the configuration and in the
         # direct parameters

diff --git a/src/concrete/ml/sklearn/linear_model.py b/src/concrete/ml/sklearn/linear_model.py
@@ -353,10 +353,10 @@ def _get_training_quantized_module(
             iterations=1,
             fit_bias=self.fit_intercept,
         )
-        
+
         # Enable the underlying FHE circuit to be composed with itself
         # This feature is used in order to be able to iterate in the clear n times without having
-        # to encrypt/decrypt the weight/bias values between each loop 
+        # to encrypt/decrypt the weight/bias values between each loop
         configuration = Configuration(composable=True)
 
         # Compile the model using the compile set
@@ -434,11 +434,11 @@ def _fit_encrypted(
                 "Target values must be 1D, with a shape of (n_samples,), when FHE training is "
                 f"enabled. Got {y.shape}"
             )
-            
+
         n_samples, n_features = X.shape
         weight_shape = (1, n_features, 1)
-        bias_shape = (1,1,1)
-        
+        bias_shape = (1, 1, 1)
+
         # Build the quantized module
         # In case of a partial fit, only do so if it has not been done already (which indicates
         # that this is the partial fit's first call)
@@ -472,9 +472,11 @@ def _fit_encrypted(
 
         y = self.label_encoder.transform(y)
 
+        # Mypy
+        assert self.training_quantized_module.fhe_circuit is not None
+
         # Key generation
         if fhe == "execute":  # pragma: no cover
-            assert self.training_quantized_module.fhe_circuit is not None
 
             # Generate the keys only if necessary. This is already done using the `force=False`
             # parameter, but here we also avoid printing too much verbose if activated
@@ -541,8 +543,9 @@ def _fit_encrypted(
         # A partial fit is similar to running a fit with a single iteration
         max_iter = 1 if is_partial_fit else self.max_iter
 
+        # Quantize and encrypt the batches
         X_batches_enc, y_batches_enc = [], []
-        for iteration_step in range(max_iter):
+        for _ in range(max_iter):
 
             # Sample the batches from X and y in the clear
             batch_indexes = self.random_number_generator.choice(
@@ -555,54 +558,66 @@ def _fit_encrypted(
             # Build the batches
             X_batch = X[batch_indexes].astype(float).reshape((1, self.batch_size, n_features))
             y_batch = y[batch_indexes].reshape((1, self.batch_size, 1)).astype(float)
-            
-            # The underlying quantized module expects (X, y, weight, bias) as inputs. We thus only 
+
+            # The underlying quantized module expects (X, y, weight, bias) as inputs. We thus only
             # quantize the input and target values using the first and second positional parameter
-            q_X_batch, q_y_batch, _, _ = self.training_quantized_module.quantize_input(X_batch, y_batch, None, None)
-
+            q_X_batch, q_y_batch, _, _ = self.training_quantized_module.quantize_input(
+                X_batch, y_batch, None, None
+            )
+
             # If the training is done in FHE, encrypt the input and target values
             if fhe == "execute":
-                
+
                 # Similarly, the underlying FHE circuit expects (X, y, weight, bias) as inputs, and
                 # so does the encrypt method
-                X_batch_enc, y_batch_enc, _, _ = self.training_quantized_module.fhe_circuit.encrypt(q_X_batch, q_y_batch, None, None)
-
+                X_batch_enc, y_batch_enc, _, _ = self.training_quantized_module.fhe_circuit.encrypt(
+                    q_X_batch, q_y_batch, None, None
+                )
+
             else:
                 X_batch_enc, y_batch_enc = q_X_batch, q_y_batch
-            
+
             X_batches_enc.append(X_batch_enc)
             y_batches_enc.append(y_batch_enc)
 
-        # Similarly, we only quantize the weight and bias values using the third and fourth 
+        # Similarly, we only quantize the weight and bias values using the third and fourth
         # position parameter
-        _, _, q_weights, q_bias = self.training_quantized_module.quantize_input(None, None, weights, bias)
+        _, _, q_weights, q_bias = self.training_quantized_module.quantize_input(
+            None, None, weights, bias
+        )
 
         # If the training is done in FHE, encrypt the weight and bias values
         if fhe == "execute":
-            
+
             # Similarly, we only encrypt using the third and fourth position parameter
             _, _, weights_enc, bias_enc = self.training_quantized_module.fhe_circuit.encrypt(
                 None, None, q_weights, q_bias
             )
-        
+
         else:
             weights_enc, bias_enc = q_weights, q_bias
 
         # Iterate on the training quantized module in the clear
         for iteration_step in range(max_iter):
-            X_batch_enc_i, y_batch_enc_i = X_batches_enc[iteration_step], y_batches_enc[iteration_step]
-
+            X_batch_enc_i, y_batch_enc_i = (
+                X_batches_enc[iteration_step],
+                y_batches_enc[iteration_step],
+            )
+
             # Train the model over one iteration
             inference_start = time.time()
-            
+
             # If the training is done in FHE, execute the underlying FHE circuit directly on the
             # encrypted values
             if fhe == "execute":
                 weights_enc, bias_enc = self.training_quantized_module.fhe_circuit.run(
-                    X_batch_enc_i, y_batch_enc_i, weights_enc, bias_enc,
+                    X_batch_enc_i,
+                    y_batch_enc_i,
+                    weights_enc,
+                    bias_enc,
                 )
-            
-            # Else, use the quantized module on the quantized values (works for both quantized 
+
+            # Else, use the quantized module on the quantized values (works for both quantized
             # clear and FHE simulation modes)
             else:
                 weights_enc, bias_enc = self.training_quantized_module.quantized_forward(
@@ -616,12 +631,16 @@ def _fit_encrypted(
 
         # If the training is done in FHE, encrypt the weight and bias values
         if fhe == "execute":
-            q_weights, q_bias = self.training_quantized_module.fhe_circuit.decrypt(weights_enc, bias_enc)
-
+            q_weights, q_bias = self.training_quantized_module.fhe_circuit.decrypt(
+                weights_enc, bias_enc
+            )
+
         else:
             q_weights, q_bias = weights_enc, bias_enc
 
-        fitted_weights, fitted_bias = self.training_quantized_module.dequantize_output(q_weights, q_bias)
+        fitted_weights, fitted_bias = self.training_quantized_module.dequantize_output(
+            q_weights, q_bias
+        )
 
         # Reshape parameters to fit what scikit-learn expects
         fitted_weights, fitted_bias = fitted_weights.squeeze(0), fitted_bias.squeeze(0)