diff --git a/text.go b/text.go index e3c9773..78d5398 100644 --- a/text.go +++ b/text.go @@ -1,6 +1,10 @@ package toolbox -import "unicode" +import ( + "bufio" + "io" + "unicode" +) //IsASCIIText return true if supplied string does not have binary data func IsASCIIText(candidate string) bool { @@ -38,9 +42,9 @@ func TerminatedSplitN(text string, fragmentCount int, terminator string) []strin lowerBound := 0 for i := fragmentSize - 1; i < len(text); i++ { isLast := i+1 == len(text) - isAtLeastOfFragementSize := i-lowerBound >= fragmentSize + isAtLeastOfFragmentSize := i-lowerBound >= fragmentSize isNewLine := string(text[i:i+len(terminator)]) == terminator - if (isAtLeastOfFragementSize && isNewLine) || isLast { + if (isAtLeastOfFragmentSize && isNewLine) || isLast { result = append(result, string(text[lowerBound:i+1])) lowerBound = i + 1 } @@ -48,6 +52,48 @@ func TerminatedSplitN(text string, fragmentCount int, terminator string) []strin return result } +//SplitTextStream divides reader supplied text by number of specified line +func SplitTextStream(reader io.Reader, writerProvider func() io.WriteCloser, elementCount int) error { + scanner := bufio.NewScanner(reader) + var writer io.WriteCloser + counter := 0 + var err error + if elementCount == 0 { + elementCount = 1 + } + for scanner.Scan() { + + if writer == nil { + writer = writerProvider() + } + data := scanner.Bytes() + if err = scanner.Err(); err != nil { + return err + } + + if counter > 0 { + if _, err = writer.Write([]byte{'\n'}); err != nil { + return err + } + } + if _, err = writer.Write(data); err != nil { + return err + } + counter++ + if counter == elementCount { + if err := writer.Close(); err != nil { + return err + } + counter = 0 + writer = nil + } + } + if writer != nil { + return writer.Close() + } + return nil +} + const ( CaseUpper = iota CaseLower diff --git a/text_test.go b/text_test.go index 46aef7a..209405b 100644 --- a/text_test.go +++ b/text_test.go @@ -1,8 +1,10 @@ package toolbox import ( + "bytes" "fmt" "github.com/stretchr/testify/assert" + "io" "strings" "testing" ) @@ -163,6 +165,88 @@ func TestTerminatedSplitN(t *testing.T) { } } +type testWriter struct { + *bytes.Buffer + data *[]string +} + +func (t *testWriter) Close() error { + *t.data = append(*t.data, t.String()) + return nil +} + +func newTestWriter(data *[]string) io.WriteCloser { + return &testWriter{ + data: data, + Buffer: new(bytes.Buffer), + } +} + +func TestSplitTextStream(t *testing.T) { + + var data = make([]byte, 0) + for i := 0; i < 9; i++ { + data = append(data, []byte(fmt.Sprintf("%v %v\n", strings.Repeat("x", 2), i))...) + } + text := string(data) + + useCases := []struct { + description string + elements int + expect []string + }{ + { + description: "no more then 4 lines case", + elements: 4, + expect: []string{ + "xx 0\nxx 1\nxx 2\nxx 3", "xx 4\nxx 5\nxx 6\nxx 7", "xx 8", + }, + }, + { + description: "3 elements each", + elements: 3, + expect: []string{ + "xx 0\nxx 1\nxx 2", "xx 3\nxx 4\nxx 5", "xx 6\nxx 7\nxx 8", + }, + }, + { + description: "9 elements", + elements: 1, + expect: []string{ + "xx 0", "xx 1", "xx 2", "xx 3", "xx 4", "xx 5", "xx 6", "xx 7", "xx 8", + }, + }, + { + description: "9 elements", + elements: 0, + expect: []string{ + "xx 0", "xx 1", "xx 2", "xx 3", "xx 4", "xx 5", "xx 6", "xx 7", "xx 8", + }, + }, + { + description: "1 elements", + elements: 10, + expect: []string{ + "xx 0\nxx 1\nxx 2\nxx 3\nxx 4\nxx 5\nxx 6\nxx 7\nxx 8", + }, + }, + { + description: "1 elements", + elements: 9, + expect: []string{ + "xx 0\nxx 1\nxx 2\nxx 3\nxx 4\nxx 5\nxx 6\nxx 7\nxx 8", + }, + }, + } + + for _, useCase := range useCases { + var data = make([]string, 0) + err := SplitTextStream(strings.NewReader(text), func() io.WriteCloser { return newTestWriter(&data) }, useCase.elements) + assert.Nil(t, err) + assert.EqualValues(t, useCase.expect, data, useCase.description) + } +} + func Test_CaseFormat(t *testing.T) { var useCases = []struct { description string