@@ -60,14 +60,31 @@ type Message struct {
6060 // cursor positions
6161 cur , start , end int
6262
63- // should the next token be a value?
64- nextisValue bool
65-
66- // how far from the = is the value, immediate following is 0
67- valueDistance int
63+ hexState int // Current hex string state
64+ hexStart bool // Is the first char a :?
65+ hexColons int // Total number of colons
66+ hexSuccColons int // The current number of successive colons
67+ hexMaxSuccColons int // Maximum number of successive colons
68+ hexSuccColonsSeries int // Number of successive colon series
6869 }
6970}
7071
72+ const (
73+ hsStart = iota
74+ hsChar1
75+ hsChar2
76+ hsColon
77+ )
78+
79+ const (
80+ hexStart = iota
81+ hexChar1
82+ hexChar2
83+ hexChar3
84+ hexChar4
85+ hexColon
86+ )
87+
7188func (this * Message ) SetData (s string ) {
7289 this .data = s
7390
@@ -191,12 +208,14 @@ func (this *Message) Scan() (Token, error) {
191208 }
192209
193210 // remove any trailing spaces
211+ s := 0 // trail space count
194212 for this .data [this .state .start + l - 1 ] == ' ' && l > 0 {
195213 l --
214+ s ++
196215 }
197216
198217 v := this .data [this .state .start : this .state .start + l ]
199- this .state .start += l
218+ this .state .start += l + s
200219
201220 token := Token {Type : t , Value : v , Field : FieldUnknown }
202221
@@ -234,21 +253,20 @@ func (this *Message) skipSpace(data string) int {
234253
235254func (this * Message ) scanToken (data string ) (int , TokenType , error ) {
236255 var (
237- tnode * timeNode = timeFsmRoot
238- timeStop , macStop , macType bool
239- timeLen , tokenLen int
240- l = len (data )
256+ tnode * timeNode = timeFsmRoot
257+ timeStop , hexStop , hexValid bool
258+ timeLen , hexLen , tokenLen int
259+ l = len (data )
241260 )
242261
243262 this .state .dots = 0
244263 this .state .tokenType = TokenUnknown
245264 this .state .tokenStop = false
265+ this .resetHexStates ()
246266
247- // short circuit the mac check
248- // positions 2,5,8,11,14 must be ':'
249- if l < 17 || data [2 ] != ':' || data [14 ] != ':' {
250- macStop = true
251- macType = false
267+ // short circuit the time check
268+ if l < 3 {
269+ hexStop = true
252270 }
253271
254272 // short circuit the time check
@@ -265,11 +283,11 @@ func (this *Message) scanToken(data string) (int, TokenType, error) {
265283 }
266284 }
267285
268- if ! macStop {
269- macType , macStop = this .macStep (i , r )
286+ if ! hexStop {
287+ hexValid , hexStop = this .hexStep (i , r )
270288
271- if macType && macStop {
272- return i + 1 , TokenMac , nil
289+ if hexValid {
290+ hexLen = i + 1
273291 }
274292 }
275293
@@ -287,7 +305,23 @@ func (this *Message) scanToken(data string) (int, TokenType, error) {
287305 }
288306 }
289307
290- if this .state .tokenStop && timeStop && macStop {
308+ //glog.Debugf("i=%d, r=%c, tokenStop=%t, timeStop=%t, hexStop=%t", i, r, this.state.tokenStop, timeStop, hexStop)
309+ // This means either we found something, or we have exhausted the string
310+ if (this .state .tokenStop && timeStop && hexStop ) || i == l - 1 {
311+ if timeLen > 0 {
312+ return timeLen , TokenTime , nil
313+ } else if hexValid && this .state .hexColons > 1 {
314+ if this .state .hexColons == 5 && this .state .hexMaxSuccColons == 1 {
315+ return hexLen , TokenMac , nil
316+ } else if this .state .hexSuccColonsSeries == 1 ||
317+ (this .state .hexColons == 7 && this .state .hexSuccColonsSeries == 0 ) {
318+
319+ return hexLen , TokenIPv6 , nil
320+ } else {
321+ return hexLen , TokenLiteral , nil
322+ }
323+ }
324+
291325 // If token length is 0, it means we didn't find time, nor did we find
292326 // a word, it cannot be space since we skipped all space. This means it
293327 // is a single character literal, so return that.
@@ -299,10 +333,6 @@ func (this *Message) scanToken(data string) (int, TokenType, error) {
299333 }
300334 }
301335
302- if timeLen > 0 {
303- return timeLen , TokenTime , nil
304- }
305-
306336 return len (data ), this .state .tokenType , nil
307337}
308338
@@ -497,59 +527,119 @@ func (this *Message) tokenStep(index int, r rune) {
497527 }
498528}
499529
500- // Returns bool, bool, first one is true if the it's a mac type, second is whether to stop scanning
501- func (this * Message ) macStep (index int , r rune ) (bool , bool ) {
502- switch {
503- case index == 0 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
504- return true , false
530+ // hexStep steps through a string and try to match a hex string of the format
531+ // - dead:beef:1234:5678:223:32ff:feb1:2e50 (ipv6 address)
532+ // - de:ad:be:ef:74:a6:bb:45:45:52:71:de:b2:12:34:56 (mac address)
533+ // - 0:09:36 (literal)
534+ // - f0f0:f::1 (ipv6)
535+ // - and a few others in the scanner_test.go/hextests list
536+ //
537+ // The ipv6 rules are:
538+ // (http://computernetworkingnotes.com/ipv6-features-concepts-and-configurations/ipv6-address-types-and-formats.html)
539+ // - Whereas IPv4 addresses use a dotted-decimal format, where each byte ranges from
540+ // 0 to 255.
541+ // - IPv6 addresses use eight sets of four hexadecimal addresses (16 bits in each set),
542+ // separated by a colon (:), like this: xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
543+ // (x would be a hexadecimal value). This notation is commonly called string notation.
544+ // - Hexadecimal values can be displayed in either lower- or upper-case for the numbers
545+ // A–F.
546+ // - A leading zero in a set of numbers can be omitted; for example, you could either
547+ // enter 0012 or 12 in one of the eight fields—both are correct.
548+ // - If you have successive fields of zeroes in an IPv6 address, you can represent
549+ // them as two colons (::). For example,0:0:0:0:0:0:0:5 could be represented as ::5;
550+ // and ABC:567:0:0:8888:9999:1111:0 could be represented asABC:567::8888:9999:1111:0.
551+ // However, you can only do this once in the address: ABC::567::891::00 would be
552+ // invalid since ::appears more than once in the address. The reason for this
553+ // limitation is that if you had two or more repetitions, you wouldn’t know how many
554+ // sets of zeroes were being omitted from each part. An unspecified address is
555+ // represented as ::, since it contains all zeroes.
556+ //
557+ // first return value indicates whether this is a valid hex string
558+ // second return value indicates whether to stop scanning
559+ func (this * Message ) hexStep (i int , r rune ) (bool , bool ) {
560+ switch this .state .hexState {
561+ case hexStart :
562+ switch {
563+ case isHex (r ):
564+ this .state .hexState = hexChar1
565+
566+ case r == ':' :
567+ this .state .hexState = hexColon
568+ this .state .hexColons ++
569+ this .state .hexSuccColons ++
570+ this .state .hexStart = true
571+ this .state .hexState = hexColon
572+
573+ if this .state .hexSuccColons > this .state .hexMaxSuccColons {
574+ this .state .hexMaxSuccColons = this .state .hexSuccColons
575+ }
505576
506- case index == 1 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
507- return true , false
577+ default :
578+ return false , true
579+ }
508580
509- case index == 2 && r == ':' :
510- return true , false
581+ return false , false
511582
512- case index == 3 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
513- return true , false
583+ case hexColon :
584+ switch {
585+ case isHex (r ):
586+ this .state .hexState = hexChar1
587+ this .state .hexSuccColons = 0
514588
515- case index == 4 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
516- return true , false
589+ if this .state .hexColons > 0 {
590+ return true , false
591+ }
517592
518- case index == 5 && r == ':' :
519- return true , false
593+ case r == ':' :
594+ this .state .hexSuccColons ++
595+ this .state .hexColons ++
520596
521- case index == 6 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
522- return true , false
597+ if this .state .hexSuccColons == 2 {
598+ this .state .hexSuccColonsSeries ++
599+ }
523600
524- case index == 7 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
525- return true , false
601+ if this .state .hexSuccColons > this .state .hexMaxSuccColons {
602+ this .state .hexMaxSuccColons = this .state .hexSuccColons
603+ }
526604
527- case index == 8 && r == ':' :
528- return true , false
605+ this .state .hexState = hexColon
529606
530- case index == 9 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
531- return true , false
607+ default :
608+ if this .state .hexColons > 0 && unicode .IsSpace (r ) {
609+ return true , true
610+ }
611+ return false , true
612+ }
532613
533- case index == 10 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
534- return true , false
614+ return false , false
535615
536- case index == 11 && r == ':' :
537- return true , false
616+ case hexChar1 , hexChar2 , hexChar3 , hexChar4 :
617+ switch {
618+ case this .state .hexState != hexChar4 && isHex (r ):
619+ this .state .hexState ++
620+ this .state .hexSuccColons = 0
538621
539- case index == 12 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
540- return true , false
622+ case r == ':' :
623+ this .state .hexState = hexColon
624+ this .state .hexColons ++
625+ this .state .hexSuccColons ++
541626
542- case index == 13 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
543- return true , false
627+ if this .state .hexSuccColons > this .state .hexMaxSuccColons {
628+ this .state .hexMaxSuccColons = this .state .hexSuccColons
629+ }
544630
545- case index == 14 && r == ':' :
546- return true , false
631+ default :
632+ if this .state .hexColons > 0 && unicode .IsSpace (r ) {
633+ return true , true
634+ }
635+ return false , true
636+ }
547637
548- case index == 15 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
549- return true , false
638+ if this .state .hexColons > 0 {
639+ return true , false
640+ }
550641
551- case index == 16 && (r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9' ):
552- return true , true
642+ return false , false
553643 }
554644
555645 return false , true
@@ -566,12 +656,27 @@ func (this *Message) reset() {
566656 this .state .start = 0
567657 this .state .end = len (this .data )
568658 this .state .cur = 0
659+
660+ this .resetHexStates ()
569661}
570662
571- func isLetter (ch rune ) bool {
572- return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode .IsLetter (ch )
663+ func (this * Message ) resetHexStates () {
664+ this .state .hexState = hsStart
665+ this .state .hexStart = false
666+ this .state .hexColons = 0
667+ this .state .hexSuccColons = 0
668+ this .state .hexMaxSuccColons = 0
669+ this .state .hexSuccColonsSeries = 0
670+ }
671+
672+ func isLetter (r rune ) bool {
673+ return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || r == '_' || r >= 0x80 && unicode .IsLetter (r )
573674}
574675
575676func isLiteral (r rune ) bool {
576- return (r >= 'a' && r <= 'z' ) || (r >= 'A' && r <= 'Z' ) || r == '+' || r == '-' || r == '_' || r == '#' || r == '\\' || r == '%' || r == '*' || r == '@' || r == '$' || r == '?'
677+ return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' || r == '+' || r == '-' || r == '_' || r == '#' || r == '\\' || r == '%' || r == '*' || r == '@' || r == '$' || r == '?'
678+ }
679+
680+ func isHex (r rune ) bool {
681+ return r >= 'a' && r <= 'f' || r >= 'A' && r <= 'F' || r >= '0' && r <= '9'
577682}
0 commit comments