readme and examples update

vc1492a · Oct 20, 2019 · 7e03049 · 7e03049
1 parent 1b902dd
commit 7e03049
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 5 deletions.
diff --git a/examples/numba_speed_diff.py b/examples/numba_speed_diff.py
@@ -0,0 +1,31 @@
+import numpy as np
+from PyNomaly import loop
+import time
+
+# generate a large set of data
+data = np.ones(shape=(10000, 4))
+
+# first time the process without Numba
+# use the progress bar to track progress
+
+t1 = time.time()
+scores_numpy = loop.LocalOutlierProbability(
+    data,
+    n_neighbors=3,
+    use_numba=False,
+    progress_bar=True
+).fit().local_outlier_probabilities
+t2 = time.time()
+seconds_no_numba = t2 - t1
+print("\nComputation took " + str(seconds_no_numba) + " seconds without Numba JIT.")
+
+t3 = time.time()
+scores_numba = loop.LocalOutlierProbability(
+    data,
+    n_neighbors=3,
+    use_numba=True,
+    progress_bar=True
+).fit().local_outlier_probabilities
+t4 = time.time()
+seconds_numba = t4 - t3
+print("\nComputation took " + str(seconds_numba) + " seconds with Numba JIT.")
diff --git a/examples/numpy.py b/examples/numpy.py
@@ -1,6 +1,5 @@
-from PyNomaly import loop
 import numpy as np
-
+from PyNomaly import loop
 
 data = np.array([
     [43.3, 30.2, 90.2],
@@ -11,6 +10,9 @@
     [421.5, 90.3, 50.0]
 ])
 
+scores = loop.LocalOutlierProbability(
+    data,
+    n_neighbors=3,
+).fit().local_outlier_probabilities
 
-scores = loop.LocalOutlierProbability(data, n_neighbors=3).fit().local_outlier_probabilities
 print(scores)
diff --git a/readme.md b/readme.md
@@ -107,7 +107,8 @@ print(scores)
 
 Numba must be installed if the above to use JIT compilation and improve the 
 speed of multiple calls to `LocalOutlierProbability()`, and PyNomaly has been 
-tested with Numba version 0.45.1. 
+tested with Numba version 0.45.1. An example of the speed difference that can 
+be realized with using Numba is avaialble in `examples/numba_speed_diff.py`. 
 
 You may also choose to print progress bars _with our without_ the use of numba 
 by passing `progress_bar=True` to the `LocalOutlierProbability()` method as above.
@@ -279,6 +280,8 @@ distance metric can be used (a neighbor index matrix must also be provided).
 This can be useful when wanting to use a distance other than the euclidean.
 
 ```python
+from sklearn.neighbors import NearestNeighbors
+
 data = np.array([
     [43.3, 30.2, 90.2],
     [62.9, 58.3, 49.3],
@@ -310,7 +313,7 @@ First, the standard LoOP algorithm is used on "training" data, with certain attr
 stored from the original LoOP approach. Then, as new points are considered, these fitted attributes are
 called when calculating the score of the incoming streaming data due to the use of averages from the initial
 fit, such as the use of a global value for the expected value of the probabilistic distance. Despite the potential
-for increased error when compared to the standard approach, but it may be effective in streaming applications where
+for increased error when compared to the standard approach, it may be effective in streaming applications where
 refitting the standard approach over all points could be computationally expensive.
 
 While the iris dataset is not streaming data, we'll use it in this example by taking the first 120 observations