/
input.py
154 lines (104 loc) · 3.42 KB
/
input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Input module
Handle different input file types and digitize sequences
Written by Marshall Beddoe <mbeddoe@baselineresearch.net>
Copyright (c) 2004 Baseline Research
Licensed under the LGPL
"""
from pcapy import *
from socket import *
__all__ = ["Input", "Pcap", "ASCII" ]
class Input:
"""Implementation of base input class"""
def __init__(self, filename):
"""Import specified filename"""
self.set = set()
self.sequences = []
self.index = 0
def __iter__(self):
self.index = 0
return self
def next(self):
if self.index == len(self.sequences):
raise StopIteration
self.index += 1
return self.sequences[self.index - 1]
def __len__(self):
return len(self.sequences)
def __repr__(self):
return "%s" % self.sequences
def __getitem__(self, index):
return self.sequences[index]
class Pcap(Input):
"""Handle the pcap file format"""
def __init__(self, filename, offset=14):
Input.__init__(self, filename)
self.pktNumber = 0
self.offset = offset
try:
pd = open_offline(filename)
except:
raise IOError
pd.dispatch(-1, self.handler)
def handler(self, hdr, pkt):
if hdr.getlen() <= 0:
return
# Increment packet counter
self.pktNumber += 1
# Ethernet is a safe assumption
offset = self.offset
# Parse IP header
iphdr = pkt[offset:]
ip_hl = ord(iphdr[0]) & 0x0f # header length
ip_len = (ord(iphdr[2]) << 8) | ord(iphdr[3]) # total length
ip_p = ord(iphdr[9]) # protocol type
ip_srcip = inet_ntoa(iphdr[12:16]) # source ip address
ip_dstip = inet_ntoa(iphdr[16:20]) # dest ip address
offset += (ip_hl * 4)
# Parse TCP if applicable
if ip_p == 6:
tcphdr = pkt[offset:]
th_sport = (ord(tcphdr[0]) << 8) | ord(tcphdr[1]) # source port
th_dport = (ord(tcphdr[2]) << 8) | ord(tcphdr[3]) # dest port
th_off = ord(tcphdr[12]) >> 4 # tcp offset
offset += (th_off * 4)
# Parse UDP if applicable
elif ip_p == 17:
offset += 8
# Parse out application layer
seq_len = (ip_len - offset) + 14
if seq_len <= 0:
return
seq = pkt[offset:]
l = len(self.set)
self.set.add(seq)
if len(self.set) == l:
return
# Digitize sequence
digitalSeq = []
for c in seq:
digitalSeq.append(ord(c))
self.sequences.append((self.pktNumber, digitalSeq))
class ASCII(Input):
"""Handle newline delimited ASCII input files"""
def __init__(self, filename):
Input.__init__(self, filename)
try:
fd = open(filename, "r")
except:
raise IOError
lineno = 0
while 1:
lineno += 1
line = fd.readline()
if not line:
break
l = len(self.set)
self.set.add(line)
if len(self.set) == l:
continue
# Digitize sequence
digitalSeq = []
for c in line:
digitalSeq.append(ord(c))
self.sequences.append((lineno, digitalSeq))