Skip to content

Commit

Permalink
cvs: Add more from/to options
Browse files Browse the repository at this point in the history
Add header option, on by default
Disable comments by default
Rename comma to delimiter
Add quote_char option
Add skip initial space option

Uses a forked version of std csv to support custom quote character

See top of csv.go for TODOs
  • Loading branch information
wader committed Jan 3, 2023
1 parent 63b5828 commit e3e2581
Show file tree
Hide file tree
Showing 7 changed files with 1,608 additions and 32 deletions.
178 changes: 148 additions & 30 deletions format/csv/csv.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
package csv

// TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
// TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
// TODO: row object keys mismatch writer
// TODO: lazy quotes?
// TODO: comment in writer? string elements?
// TODO: to_csv objects
// TODO: to_csv opts help
// TODO: go maps are random order, now sorts headers
// TODO: option aliases?
// TODO: snake_case option?

import (
"bytes"
"embed"
"encoding/csv"
"errors"
"fmt"
"io"
"sort"

"github.com/wader/fq/format"
"github.com/wader/fq/internal/csvex"
"github.com/wader/fq/internal/gojqex"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
Expand All @@ -27,8 +39,11 @@ func init() {
ProbeOrder: format.ProbeOrderTextFuzzy,
DecodeFn: decodeCSV,
DecodeInArg: format.CSVLIn{
Comma: ",",
Comment: "#",
Delimiter: ",",
Comment: "",
QuoteChar: `"`,
Header: true,
SkipInitialSpace: false,
},
Functions: []string{"_todisplay"},
})
Expand All @@ -39,29 +54,59 @@ func init() {
func decodeCSV(d *decode.D, in any) any {
ci, _ := in.(format.CSVLIn)

var rvs []any
br := d.RawLen(d.Len())
r := csv.NewReader(bitio.NewIOReader(br))
r.TrimLeadingSpace = true
r := csvex.NewReader(bitio.NewIOReader(br))
r.LazyQuotes = true
if ci.Comma != "" {
if ci.Delimiter != "" {
r.Comma = rune(ci.Delimiter[0])
} else if ci.Comma != "" {
r.Comma = rune(ci.Comma[0])
}
if ci.Comment != "" {
r.Comment = rune(ci.Comment[0])
} else {
r.Comment = 0
}
if ci.QuoteChar != "" {
r.Quote = rune(ci.QuoteChar[0])
} else {
r.Quote = '"'
}
r.TrimLeadingSpace = ci.SkipInitialSpace

row := 1
var rvs []any

var headers []string
for {
r, err := r.Read()
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}
var vs []any
for _, s := range r {
vs = append(vs, s)

if ci.Header {
if headers == nil {
// TODO: duplicate headers?
headers = append(headers, r...)
} else {
obj := map[string]any{}
for i, s := range r {
h := headers[i]
obj[h] = s
}
rvs = append(rvs, obj)
}
} else {
var vs []any
for _, s := range r {
vs = append(vs, s)
}
rvs = append(rvs, vs)
}
rvs = append(rvs, vs)

row++
}

d.Value.V = &scalar.Any{Actual: rvs}
Expand All @@ -71,35 +116,108 @@ func decodeCSV(d *decode.D, in any) any {
}

type ToCSVOpts struct {
Comma string
Comma string // alias for Delimiter
Delimiter string
QuoteChar string
Header bool
}

func toCSV(_ *interp.Interp, c []any, opts ToCSVOpts) any {
b := &bytes.Buffer{}
w := csv.NewWriter(b)
if opts.Comma != "" {
w := csvex.NewWriter(b)
if opts.Delimiter != "" {
w.Comma = rune(opts.Delimiter[0])
} else if opts.Comma != "" {
w.Comma = rune(opts.Comma[0])
}
if opts.QuoteChar != "" {
w.Quote = rune(opts.QuoteChar[0])
} else {
w.Quote = '"'
}

seenObject := 0
seenArrays := 0
var headers []string

for _, row := range c {
rs, ok := gojqex.Cast[[]any](row)
if !ok {
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
}
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
if !ok {
panic("not array")
}
var ss []string
for _, v := range vs {
s, ok := v.(string)
switch row.(type) {
case []any:
if seenObject > 0 {
return fmt.Errorf("mixed row types, expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
}

rs, ok := gojqex.Cast[[]any](row)
if !ok {
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
}
ss = append(ss, s)
}
if err := w.Write(ss); err != nil {
return err
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
if !ok {
panic("not array")
}
var ss []string
for _, v := range vs {
s, ok := v.(string)
if !ok {
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
}
ss = append(ss, s)
}
if err := w.Write(ss); err != nil {
return err
}

seenArrays++
case map[string]any:
if seenArrays > 0 {
return fmt.Errorf("mixed row types, expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
}

rm, ok := gojqex.Cast[map[string]any](row)
if !ok {
return fmt.Errorf("expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
}
vm, ok := gojqex.NormalizeToStrings(rm).(map[string]any)
if !ok {
panic("not object")
}

if headers == nil {
// TODO: maps are random order in go
for k := range vm {
headers = append(headers, k)
}
sort.Strings(headers)

if err := w.Write(headers); err != nil {
return err
}
}

var ss []string
keysFound := 0
for _, k := range headers {
s, ok := vm[k].(string)
if !ok {
return fmt.Errorf("expected row object to have a %q key, %s", k, gojqex.TypeErrorPreview(row))
}
ss = append(ss, s)
keysFound++
}
// TODO: what keys are extra/missing
if keysFound < len(headers) {
return fmt.Errorf("expected row object has missing keys %s", gojqex.TypeErrorPreview(row))
} else if keysFound > len(headers) {
return fmt.Errorf("expected row object has extra keys %s", gojqex.TypeErrorPreview(row))
}

if err := w.Write(ss); err != nil {
return err
}

seenObject++
}

}
w.Flush()

Expand Down
8 changes: 6 additions & 2 deletions format/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,12 @@ type HTMLIn struct {
}

type CSVLIn struct {
Comma string `doc:"Separator character"`
Comment string `doc:"Comment line character"`
Comma string `doc:"Alias for Delimiter"`
Delimiter string `doc:"Field delimiter character"`
Comment string `doc:"Comment line character"`
QuoteChar string `doc:"Quote character"`
Header bool `doc:"Convert to objects based on header"`
SkipInitialSpace bool `doc:"Don't include leading whitespace"`
}

type BitCoinBlockIn struct {
Expand Down
1 change: 1 addition & 0 deletions internal/csvex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Modified version of go std encoding/csv Reader/Writer to support difference quote character
Loading

0 comments on commit e3e2581

Please sign in to comment.