/
shanon-calc.py
96 lines (71 loc) · 2.16 KB
/
shanon-calc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import argparse
import math
import matplotlib.pyplot as plt
import numpy as np
import sys
CHUNK_SIZE = 256
def main():
# Get the args
parser = argparse.ArgumentParser()
parser.add_argument("-p", help="Specify the file path")
args = parser.parse_args()
file_path = args.p
# Check to see if a file path is given
if (file_path == None):
sys.exit("[#] Please enter a file path.\n exiting")
print("[*] File: ", file_path)
with open(file_path, 'rb') as f:
data = f.read()
if data:
file_entropy = information_entropy(data)
print("[*] The entropy is", file_entropy, "bits per byte")
chunk_wise_entropy(data)
"""
This function is responsible for plotting the entropy for each region
"""
def plot_entropy(entropy_chunk, offset_chunk):
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, autoscale_on=True)
ax.set_title("Entropy Graph")
ax.set_xlabel("Offset")
ax.set_ylabel("Entropy")
ax.plot(offset_chunk, entropy_chunk, 'y', lw=2)
plt.show()
"""
This function divides the entire file into chunks and computes the entropy
for each chunk. The entropy values are then passed to another function to
be plotted.
"""
def chunk_wise_entropy(file_data):
entropy_chunk = []
offset_chunk = []
counter = 0
data_len = len(file_data)
if file_data:
while counter <= data_len:
entropy_chunk.append(information_entropy(file_data[counter:counter+CHUNK_SIZE]))
offset_chunk.append(counter)
counter += CHUNK_SIZE
plot_entropy(entropy_chunk, offset_chunk)
"""
This function is responsible for computing the Shanon entropy of the data
passed to it.
"""
def information_entropy(data):
# Create a counter for all the 256 different possible values
possible_vals = dict(((chr(x), 0) for x in range(0, 256)))
# Increment the counter if the byte has the same value as one of the keys
for byte in data:
possible_vals[chr(byte)] +=1
data_len = len(data)
entropy = 0.0
# Compute the entropy of the data block
for count in possible_vals:
if possible_vals[count] == 0:
continue
# p is the probability of seeing this byte in the file
p = float(possible_vals[count] / data_len)
entropy -= p * math.log(p, 2)
return entropy
if __name__ == "__main__":
main()