From 0d55a9fae74edf990a087463a52b81ef196862a2 Mon Sep 17 00:00:00 2001 From: Naftali Harris Date: Tue, 29 Jul 2014 23:46:30 -0700 Subject: [PATCH] Avoid numerical instability This avoids basically doing 1 - 1, for example: >>> from math import exp >>> margin = -40 >>> 1 - 1 / (1 + exp(margin)) 0.0 >>> exp(margin) / (1 + exp(margin)) 4.248354255291589e-18 >>> --- python/pyspark/mllib/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 9e28dfbb9145d..2bbb9c3fca315 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -66,7 +66,8 @@ def predict(self, x): if margin > 0: prob = 1 / (1 + exp(-margin)) else: - prob = 1 - 1 / (1 + exp(margin)) + exp_margin = exp(margin) + prob = exp_margin / (1 + exp_margin) return 1 if prob > 0.5 else 0