Skip to content

Commit

Permalink
Extend filtering to sequences of vowels/consonants (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
vearutop committed Mar 23, 2023
1 parent b320d02 commit 99bb6f3
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ linters-settings:
check-type-assertions: true
check-blank: true
gocyclo:
min-complexity: 20
min-complexity: 40
dupl:
threshold: 100
misspell:
Expand Down
10 changes: 5 additions & 5 deletions filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"testing"
)

const logLine = `foo-bar-16 i2 2022/09/02 09:48:29.199655 baz handler failed to get cox: failed to get cox type: unknown cox type string: {quux} [R method:GET path:/abcd5s8 ra:2022-09-02T09:48:29 form:'map[bar:[11_2022-09-02] lox_id:[] cucumber:[RedCat1509_2022-09-02] cucumber_id:[132072] faux_id:[afExxSDFKHgBJcwxDgIxDETR1vEAWVVHqXo6PcBjfoaDF29f_I8jYTZZVyKeiXzPlP9O9k3SrZtY3IeqA] cox_alarm:[{payout}}] cox_carrot:[OCD] cox_type:[{marks}] creative:[62_203206_123ebd32047fe640] foo_boo_99diks:[https://peebee.jeeass-foo.site/pushforw?lockid=sdd32432dUR1vEAWVVHqXo6PcBjfoaDF29f_Ik3SrZtY3FzXvq0fP1IeqA] goal:[99diks] gps_pos:[1231230D-0BE9-41EF-B146-123123123B9BC7] baz:[123123124-0BE9-41EF-B146-123123123123] ip_address:[1.2.333.4] labelle:[72_206706_125329e2047fe640] poob_id:[62_206706_550eb9e2047fe640] baz_limit:[1234]]' header:'map[User-Agent:[Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148] X-Boo-Id:[12312380d-0be9-41ef-b146-3452352323] X-Forwarded-For:[111.332.555.333] X-Forwarded-Proto:[https] X-Forwarded-For:[123.321.123.321]]' foocksorized gees_valeed deeedre_lee:0]`
const logLine = `foo-bar-16 i2 2022/09/02 09:48:29.199655 baz cdfgzaa aouios handler failed to get cox: failed to get cox type: unknown cox type string: {quux} [R method:GET path:/abcd5s8 ra:2022-09-02T09:48:29 form:'map[bar:[11_2022-09-02] lox_id:[] cucumber:[RedCat1509_2022-09-02] cucumber_id:[132072] faux_id:[afExxSDFKHgBJcwxDgIxDETR1vEAWVVHqXo6PcBjfoaDF29f_I8jYTZZVyKeiXzPlP9O9k3SrZtY3IeqA] cox_alarm:[{payout}}] cox_carrot:[OCD] cox_type:[{marks}] creative:[62_203206_123ebd32047fe640] foo_boo_99diks:[https://peebee.jeeass-foo.site/pushforw?lockid=sdd32432dUR1vEAWVVHqXo6PcBjfoaDF29f_Ik3SrZtY3FzXvq0fP1IeqA] goal:[99diks] gps_pos:[1231230D-0BE9-41EF-B146-123123123B9BC7] baz:[123123124-0BE9-41EF-B146-123123123123] ip_address:[1.2.333.4] labelle:[72_206706_125329e2047fe640] poob_id:[62_206706_550eb9e2047fe640] baz_limit:[1234]]' header:'map[User-Agent:[Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148] X-Boo-Id:[12312380d-0be9-41ef-b146-3452352323] X-Forwarded-For:[111.332.555.333] X-Forwarded-Proto:[https] X-Forwarded-For:[123.321.123.321]]' foocksorized gees_valeed deeedre_lee:0]`

// BenchmarkFilterAlphanumeric checks bespoke implementation.
// BenchmarkFilterAlphanumeric-12 1912300 614.7 ns/op 0 B/op 0 allocs/op.
Expand All @@ -14,14 +14,14 @@ func BenchmarkFilterAlphanumeric(b *testing.B) {

for i := 0; i < b.N; i++ {
s := []byte(logLine)
a := filterAlphanumeric(s, 150)
a := filterDynamic(s, 150)
_ = a
}
}

func TestAlphanum(t *testing.T) {
filtered := string(filterAlphanumeric([]byte(logLine), 150))
expected := `X X X/X/X X:X:X.X baz handler failed to get cox: failed to get cox type: unknown cox type string: {quux} [R method:GET path:/X ra:X:X:X form:'map[bar:`
filtered := string(filterDynamic([]byte(logLine), 150))
expected := `X X X/X/X X:X:X.X baz X X handler failed to get cox: failed to get cox type: unknown cox type string: {quux} [R method:GET path:/X ra:X:X:X form:'map[`

if expected != filtered {
t.Fatalf("unexpected filtered: %s", filtered)
Expand All @@ -31,7 +31,7 @@ func TestAlphanum(t *testing.T) {
func TestShortLine(t *testing.T) {
line := "foo-bar-12 i3 2022/09/15 11:24:10.689412 baz 0-275 foo bar"
expected := "X X X/X/X X:X:X.X baz X foo bar"
filtered := string(filterAlphanumeric([]byte(line), 120))
filtered := string(filterDynamic([]byte(line), 120))

if expected != filtered {
t.Fatalf("unexpected filtered: %s", filtered)
Expand Down
39 changes: 35 additions & 4 deletions fllter.go
Original file line number Diff line number Diff line change
@@ -1,24 +1,42 @@
package main

// filterAlphanumeric replaces a-zA-Z_-% sequences that have at least one digit with X.
// filterDynamic replaces a-zA-Z_-% sequences that have at least one digit or 5+ consecutive consolants/vowels with X.
// Does not allocate, uses original slice.
func filterAlphanumeric(data []byte, l int) []byte {
func filterDynamic(data []byte, l int) []byte {
hasDigit := false
wordStart := -1
maxConsecutive := 0
consecutive := 0

res := data[:0]

var i int
var (
i int
prevCharType byte
)

for i = 0; i < len(data); i++ {
c := data[i]

isAlpha := false
charType := byte(0)

switch {
case c >= 'a' && c <= 'z':
if c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' || c == 'y' || c == 'w' {
charType = 'v' // Vowel.
} else {
charType = 'c' // Consonant.
}

isAlpha = true
case c >= 'A' && c <= 'Z':
if c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U' || c == 'Y' || c == 'W' {
charType = 'v'
} else {
charType = 'c'
}

isAlpha = true
case c >= '0' && c <= '9':
isAlpha = true
Expand All @@ -27,16 +45,29 @@ func filterAlphanumeric(data []byte, l int) []byte {
isAlpha = true
}

if charType == prevCharType {
consecutive++
} else {
if consecutive > maxConsecutive {
maxConsecutive = consecutive
}

prevCharType = charType
consecutive = 1
}

// Finish current word.
if wordStart >= 0 && !isAlpha {
if hasDigit {
if hasDigit || maxConsecutive > 4 {
res = append(res, 'X')
} else {
res = append(res, data[wordStart:i]...)
}

wordStart = -1
hasDigit = false
maxConsecutive = 0
consecutive = 0
}

if wordStart == -1 {
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func main() {
}

if top > 0 {
filtered := filterAlphanumeric(line, length)
filtered := filterDynamic(line, length)

d.Reset()

Expand Down

0 comments on commit 99bb6f3

Please sign in to comment.