Re-adds python code to latest version

usnistgov · Mar 5, 2018 · 38790fa · 38790fa
1 parent 978e768
commit 38790fa
Show file tree

Hide file tree

Showing 10 changed files with 129 additions and 54 deletions.
diff --git a/python/IG-how-to.docx b/python/IG-how-to.docx
diff --git a/python/LRS.py b/python/LRS.py
@@ -118,6 +118,7 @@ def LRS_estimate(s, verbose=False):
                 numerator += math.factorial(c)/2/math.factorial(c-2)
         denom = (L-W+1)*(L-W) / 2 
         P.append(math.pow(float(numerator)/denom, 1.0/W))
+
 
     # The entropy estimate is calculated as -log_2 max(Pmax)
     return max(P), -math.log(max(P),2)

diff --git a/python/README.md b/python/README.md
@@ -0,0 +1,75 @@
+# SP800-90B_EntropyAssessment (DRAFT)
+Cryptographic random bit generators (RBGs), also known as random number generators (RNGs), require a noise source that produces digital outputs with some level of unpredictability, expressed as min-entropy. 
+The SP800-90B_EntropyAssessment python package implements the min-entropy assessment methods included in the 2016 draft of Special Publication 800-90B.
+
+Please note that this code package was published to assist in the evaluation of the entropy estimation methods provided in the draft publication. As such, it is written to resemble the pseudocode in the draft, and is not optimized for performance. After SP 800-90B is finalized, the code will be ported to another language and performance improvements will be made.
+
+## Disclaimer
+NIST-developed software is provided by NIST as a public service. You may use, copy and distribute copies of the software in any medium, provided that you keep intact this entire notice. You may improve, modify and create derivative works of the software or any portion of the software, and you may copy and distribute such modifications or works. Modified works should carry a notice stating that you changed the software and should note the date and nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the source of the software.
+
+NIST-developed software is expressly provided ?AS IS.? NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
+
+You are solely responsible for determining the appropriateness of using and distributing the software and you assume all risks associated with its use, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of operation. This software is not intended to be used in any situation where a failure could cause risk of injury or damage to property. The software developed by NIST employees is not subject to copyright protection within the United States.
+
+The identification of any commercial product or trade name does not imply endorsement or recommendation by the National Institute of Standards and Technology, nor is it intended to imply that the materials or equipment identified are necessarily the best available for the purpose.
+
+## Requirements
+
+This code package requires 64-bit Python 2.7 or Python 3.
+
+## Summary of Changes
+1. updated for second draft of SP 800-90B (January 2016)
+
+## Basic Usage
+
+There are three main files in this code package: iid_main.py, noniid_main.py, and restart.py. Brief usage descriptions are listed below. For further details, please refer to the user guide.
+
+## Using iid_main.py
+The file iid_main.py calls all of the tests that determine whether or not the input file appears to contain independent and identically distributed (IID) samples, and if so, gives an entropy assessment. 
+The program takes three arguments: 
+
+1. 	datafile: a binary file containing the samples to be tested.
+2. 	bits_per_symbol: the number of bits required to represent the largest output symbol from the noise source. E.g., if the largest value is 12, this would be 4.
+
+### Example
+	> python iid_main.py truerand_1bit.bin 1
+	reading 1000000 bytes of data
+	IID = True
+	min-entropy = 0.995043
+
+	Don't forget to run the sanity check on a restart dataset using H_I = 0.995043
+
+## Using noniid_main.py
+The file noniid_main.py calls all of the min-entropy estimation methods. The program requires two arguments:
+
+1. 	datafile: a binary file containing the samples to be tested.
+2. 	bits_per_symbol: the number of bits required to represent the largest output symbol from the noise source. E.g., if the largest value is 12, this would be 4.
+
+### Example
+Non-IID estimators applied to same data as above:
+
+	> python noniid_main.py truerand_4bit.bin 4
+	reading 1000000 bytes of data
+	min-entropy = 3.70057
+
+	Don't forget to run the sanity check on a restart dataset using H_I = 3.70057
+
+## Using restart.py
+The file restart.py performs the sanity checks on the restart dataset. The program requires three arguments:
+
+1. 	datafile: a binary file containing the samples to be tested.
+2. 	bits_per_symbol: the number of bits required to represent the largest output symbol from the noise source. E.g., if the largest value is 12, this would be 4.
+3.	H_I: initial entropy estimate obtained via iid_main.py or noniid_main.py.
+
+### Example
+	> python restart.py truerand_4bit.bin 4 3.70057
+	reading 1000000 bytes of data
+	Passed the restart tests
+	*** Final entropy estimate: 3.700570
+
+## More Information
+For more information on using this code, such as optional arguments, see the user guide in this repository.
+For more information on the estimation methods, see [SP 800-90B second draft](http://csrc.nist.gov/publications/drafts/800-90/sp800-90b_second_draft.pdf).
+
+### Contact Information
+This code is currently maintained by Kerry McKay and John Kelsey.
diff --git a/python/SP90Bv2_predictors.py b/python/SP90Bv2_predictors.py
@@ -243,7 +243,7 @@ def Lag(S, verbose=False):
     Pavg = calcPavg(C, N)
 
     #step 6
-    Prun = calcRun(correct)
+    Prun = calcRun(correct)\
 
     #step 7
     minH = -math.log(max(Pavg, Prun),2)
@@ -326,7 +326,6 @@ def MultiMMC(S, verbose=False):
 
     #step 5
     C = sum(correct)
-    print("Correct: %d" % C)
 
     #step 6
     Pavg = calcPavg(C, N)
@@ -386,7 +385,7 @@ def LZ78Y(S, verbose=False):
         predict = None
         for j in range(B,0,-1):
             prev = tuple(S[i-j-1:i-1])
-            if D.get(prev,0) > 0:
+            if prev in D:
                 for y in sorted(D[prev].keys(),reverse=True):
                     if D[prev][y] > maxcount:
                         predict = y
@@ -397,7 +396,6 @@ def LZ78Y(S, verbose=False):
 
     #step 4
     C = sum(correct)
-    print("\n Correct: %d\n" % C)
     Pavg = calcPavg(C, N)
 
     #step 5

diff --git a/python/chi_square_tests.py b/python/chi_square_tests.py
@@ -17,6 +17,7 @@
 from collections import OrderedDict, Counter
 from operator import itemgetter
 import itertools
+import sys
 
 # does the dataset pass the chi-square tests?
 def pass_chi_square_tests(dataset, verbose=False):
@@ -94,7 +95,10 @@ def chi_square_independence(s):
     more = True
     while more:
         try:
-            pair = pair_iterator.next()
+            if sys.version_info >= (3,0):
+                pair = pair_iterator.__next__()
+            else:
+                pair = pair_iterator.next()
             e[pair] = p[pair[0]]*p[pair[1]]*(L-1)
         except:
             more = False
@@ -123,7 +127,7 @@ def chi_square_independence(s):
         bins[q][0] = bins[q][0] + str(pair)
         bins[q][1] = bins[q][1] + pair_counts.get(pair,0)
         bins[q][2] = bins[q][2] + e[pair]
-
+        
         if e[pair] >= 5:
             q += 1
             bins.append(['',0,0])
@@ -140,9 +144,10 @@ def chi_square_independence(s):
         bins.pop()
         q -= 1
 
+
     # caclulate the test statistic, T
     T = 0
-    for i in range(q):
+    for i in range(q+1):
         T += float((bins[i][1] - bins[i][2])**2)/bins[i][2]
 
     # return statistic with q-1 df (since our indices start at 0, df is q)

diff --git a/python/iid_main.py b/python/iid_main.py
@@ -35,8 +35,6 @@
     datafile = args.datafile
     bits_per_symbol = int(args.bits_per_symbol)
     verbose = bool(args.verbose)
-    max_processes = int(args.processes)
-
 
     with open(datafile, 'rb') as file:
         # Read in raw bytes and convert to list of output symbols
@@ -47,14 +45,13 @@
             # print file and dataset details
             print ("Read in file %s, %d bytes long." % (datafile, len(bytes_in)))
             print ("Dataset: %d %d-bit symbols, %d symbols in alphabet." % (len(dataset), bits_per_symbol, k))
-            print ("Output symbol values: min = %d, max = %d." % (min(dataset), max(dataset)))
-            print ("Max processes allowed: %d.\n" % max_processes)
+            print ("Output symbol values: min = %d, max = %d\n" % (min(dataset), max(dataset)))
 
         #######################################
         # STEP 1: Determine if Dataset is IID #
         #######################################
         # determine if dataset is IID using shuffle and Chi-square tests
-        passed_permutation_tests = permutation_test(dataset, max_processes, verbose)
+        passed_permutation_tests = permutation_test(list(dataset), verbose)
 
         if passed_permutation_tests:
             if verbose:
@@ -63,7 +60,6 @@
             if verbose:
                 print ("** Failed IID permutation tests")
             print ("IID = False")
-            print ("min-entropy = 0.0")
             sys.exit(0)
 
         # run chi-square tests on dataset

diff --git a/python/noniid_main.py b/python/noniid_main.py
@@ -101,6 +101,7 @@
             print("- Compression Estimate: p(max) = %g, min-entropy = %g" % (pmax, minH))
         minEntropy = min(minH, minEntropy)
 
+
         # Section 6.3.5 The t-Tuple Estimate
         pmax, minH = t_tuple(dataset)
         if verbose:
@@ -129,11 +130,13 @@
             print("Lag Prediction Estimate: p(max) = %g, min-entropy = %g\n" % (pmax, minH))
         minEntropy = min(minH, minEntropy)
 
+
         # Section 6.3.9 MultiMMC prediction estimate
         pmax, minH = MultiMMC(dataset, verbose)
         if verbose:
             print("MultiMMC Prediction Estimate: p(max) = %g, min-entropy = %g\n" % (pmax, minH))
         minEntropy = min(minH, minEntropy)
+
 
         # Section 6.3.10 LZ78Y prediction estimate
         pmax, minH = LZ78Y(dataset, verbose)