-
Notifications
You must be signed in to change notification settings - Fork 1
/
entropy.py
executable file
·85 lines (71 loc) · 2.48 KB
/
entropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
# Module: entropy.py
# Purpose: compute Shannon entropy of a blocks of bytes
# Date: N/A
# Notes:
# 1) This is based on code from several threads on StackOverflow
# and may be used to compute the Shannon entropy in the range
# of 0..1 for a block of bytes using Python. For example, this
# may be used to compute where events occur in a stream or file.
# 2) Ref:
# http://stackoverflow.com/questions/990477/how-to-calculate-the-entropy-of-a-file
# Other notes: http://libdisorder.freshdefense.net/
# and: http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
#
"""Compute Shannon Entropy of blocks of bytes"""
import math
import random
# compute the Shannon entropy of a buffer
def compute_entropy(in_buf):
"""Compute and display entropy of a buffer"""
byte_buff = bytearray(in_buf)
# create byte count array, initialized to 0
byte_count = [0 for x in range(256)]
print "Compute counts for each byte value"
total = len(byte_buff)
for this_byte in range(256):
#print "this_byte=", this_byte
cnt = 0
for byte in byte_buff:
if byte == this_byte:
cnt += 1
byte_count[this_byte] = cnt
#print "len byte_count = ", len(byte_count)
print "Compute probability and Shannon entropy"
ent = 0.0
#i = 0
#print len(byte_count)
for cnt in byte_count:
#print "I=", i
if cnt == 0:
#i += 1
continue
prob = (1.0 * cnt) / total
#print "P[{0}] = {1}".format(i, p)
ent -= prob * math.log(prob, 256)
#i += 1
print "Entropy is ", ent
# run the entropy test
def run_entropy():
"""Run the entropy tests"""
print "entropy.py: Computing entropy of a byte buffers"
tot = 1024
print "Creating buffer with random data, len={0}".format(tot)
byte_buff = bytearray(tot)
#print "buff len=", len(byte_buff)
random.seed(1234)
for i in range(tot):
byte_buff[i] = int(random.random()*256)
compute_entropy(byte_buff)
byte_buff = b'0123456789'
tot = len(byte_buff)
print "Creating buffer with ASCII 0..9, len={0}".format(tot)
compute_entropy(byte_buff)
byte_buff = bytearray([int(random.triangular(0, 100, 50))
for x in range(1024)])
tot = len(byte_buff)
print "Creating buffer with triangular distribution, len={0}".format(tot)
compute_entropy(byte_buff)
# module main
if __name__ == "__main__":
run_entropy()