diff --git a/README.md b/README.md index cc57244..334fb43 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,4 @@ [![Build Status](https://travis-ci.org/smartystreets/scanners.svg?branch=master)](https://travis-ci.org/smartystreets/scanners) [![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners) [![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners) -[![GoDoc](https://godoc.org/github.com/smartystreets/scanners?status.svg)](http://godoc.org/github.com/smartystreets/scanners) +[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/github.com/smartystreets/scanners) diff --git a/csv/column_scanner.go b/csv/column_scanner.go index 1f55511..9f37643 100644 --- a/csv/column_scanner.go +++ b/csv/column_scanner.go @@ -6,12 +6,24 @@ import ( "log" ) +// ColumnScanner provides access to the fields of CSV-encoded +// data by column name. The scanner assumes the first +// record in the data to be the header with column names. If +// duplicate names exist in the header, the last column for the +// duplicate name will be used. +// +// All configurations of the underlying *csv.Reader are available +// through an [Option]. type ColumnScanner struct { *Scanner headerRecord []string columnIndex map[string]int } +// NewColumnScanner returns a ColumnScanner that reads from reader, +// configured with the provided options, and assumes the first record +// to be the header. It calls Scan once to read the header; subsequent +// calls to Scan will return the remaining records. func NewColumnScanner(reader io.Reader, options ...Option) (*ColumnScanner, error) { inner := NewScanner(reader, append(options, FieldsPerRecord(0))...) if !inner.Scan() { @@ -32,10 +44,14 @@ func (this *ColumnScanner) readHeader() { } } +// Header returns the header record. func (this *ColumnScanner) Header() []string { return this.headerRecord } +// ColumnErr returns the value for column name of the most recent +// record generated by a call to Scan as a string. It returns an +// error if column was not present in the header record. func (this *ColumnScanner) ColumnErr(column string) (string, error) { index, ok := this.columnIndex[column] if !ok { @@ -44,6 +60,8 @@ func (this *ColumnScanner) ColumnErr(column string) (string, error) { return this.Record()[index], nil } +// Column wraps [ColumnScanner.ColumnErr], but panics if the name was not present +// in the header record. func (this *ColumnScanner) Column(column string) string { value, err := this.ColumnErr(column) if err != nil { diff --git a/csv/column_scanner_test.go b/csv/column_scanner_test.go index 945ae8b..9541069 100644 --- a/csv/column_scanner_test.go +++ b/csv/column_scanner_test.go @@ -69,6 +69,20 @@ func (this *ColumnScannerFixture) TestColumnNotFound_Panic() { this.So(func() { this.scanner.Column("nope") }, should.Panic) } +// TestDuplicateColumnNames confirms that duplicated/repeated +// column names results in the last repeated column being +// added to the map and used to retrieve values for that name. +func (this *ColumnScannerFixture) TestDuplicateColumnNames() { + scanner, err := NewColumnScanner(reader([]string{ + "Col1,Col2,Col2", + "foo,bar,baz", + })) + this.So(err, should.BeNil) + this.So(scanner.Header(), should.Resemble, []string{"Col1", "Col2", "Col2"}) + scanner.Scan() + this.So(scanner.Column("Col2"), should.Equal, "baz") +} + type User struct { FirstName string LastName string diff --git a/csv/example_test.go b/csv/example_test.go index 1e68629..a453b39 100644 --- a/csv/example_test.go +++ b/csv/example_test.go @@ -59,3 +59,61 @@ func ExampleScanner_options() { // [Ken Thompson ken] // [Robert Griesemer gri] } + +// A ColumnScanner maps field values in each row to column +// names. The column name is taken from the first row, which +// is assumed to be the header row. +func ExampleColumnScanner() { + in := strings.Join([]string{ + `first_name,last_name,username`, + `"Rob","Pike",rob`, + `Ken,Thompson,ken`, + `"Robert","Griesemer","gri"`, + }, "\n") + scanner, _ := csv.NewColumnScanner(strings.NewReader(in)) + + for scanner.Scan() { + fmt.Println(scanner.Column("last_name"), scanner.Column("first_name")) + } + + if err := scanner.Error(); err != nil { + log.Panic(err) + } + + // Output: + // Pike Rob + // Thompson Ken + // Griesemer Robert +} + +func ExampleStructScanner() { + type person struct { + Firstname string `csv:"first_name"` + Lastname string `csv:"last_name"` + Username string `csv:"username"` + } + + in := strings.Join([]string{ + `first_name,last_name,username`, + `"Rob","Pike",rob`, + `Ken,Thompson,ken`, + `"Robert","Griesemer","gri"`, + }, "\n") + + scanner, _ := csv.NewStructScanner(strings.NewReader(in)) + + for scanner.Scan() { + var p person + scanner.Populate(&p) + fmt.Printf("%+v\n", p) + } + + if err := scanner.Error(); err != nil { + log.Panic(err) + } + + // Output: + // {Firstname:Rob Lastname:Pike Username:rob} + // {Firstname:Ken Lastname:Thompson Username:ken} + // {Firstname:Robert Lastname:Griesemer Username:gri} +} diff --git a/csv/options.go b/csv/options.go index c07fbae..c6989a7 100644 --- a/csv/options.go +++ b/csv/options.go @@ -8,23 +8,37 @@ type Option func(*Scanner) // If true is passed, continue scanning until io.EOF is reached. // If false is passed (default), any error encountered during scanning // will result in the next call to Scan returning false and -// the Scanner may be considered dead. See Scanner.Error() for the exact error -// (before the next call to Scanner.Scan()). -// See https://golang.org/pkg/encoding/csv/#pkg-variables -// and https://golang.org/pkg/encoding/csv/#ParseError -// for more information regarding possible error values. +// the Scanner may be considered dead. Check [Scanner.Error] for the exact error +// (before the next call to [Scanner.Scan]). +// +// See the error variables starting at [csv.ErrBareQuote], and the +// [csv.ParseError] type, for more information regarding possible +// error values. func ContinueOnError(continue_ bool) Option { return func(s *Scanner) { s.continueOnError = continue_ } } -func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } } + +// See the [csv.Reader.Comma] field. +func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } } + +// See the [csv.Reader.Comment] field. func Comment(comment rune) Option { return func(s *Scanner) { s.reader.Comment = comment } } + +// See the [csv.Reader.FieldsPerRecord] field. func FieldsPerRecord(fields int) Option { return func(s *Scanner) { s.reader.FieldsPerRecord = fields } } -func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } } -func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } } + +// See the [csv.Reader.LazyQuotes] field. +func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } } + +// See the [csv.Reader.ReuseRecord] field. +func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } } + +// See the [csv.Reader.TrimLeadingSpace] field. func TrimLeadingSpace(trim bool) Option { return func(s *Scanner) { s.reader.TrimLeadingSpace = trim } } -func SkipHeaderRecord() Option { return SkipRecords(1) } + +func SkipHeaderRecord() Option { return SkipRecords(1) } func SkipRecords(count int) Option { return func(s *Scanner) { for x := 0; x < count; x++ { diff --git a/csv/scanner.go b/csv/scanner.go index b02e336..b6391f2 100644 --- a/csv/scanner.go +++ b/csv/scanner.go @@ -1,3 +1,14 @@ +// Package csv scans CSV files, provides easy access to individual +// columns, and can also read field values into a struct (analogous +// to unmarshaling JSON or XML). +// +// It thinly wraps the standard library's [csv.Reader] and exposes +// most of its configuration "knobs" and behavior. Knowledge of +// the csv.Reader will help in configuring and running these +// scanners. +// +// Advance the scanners with the Scan method and check errors with +// the Error method (unlike fields and fixedwidth, which use Err). package csv import ( @@ -5,7 +16,10 @@ import ( "io" ) -// Scanner wraps a csv.Reader via an API similar to that of bufio.Scanner. +// Scanner provides access to the fields of CSV-encoded data. +// +// All configurations of the underlying *csv.Reader are available +// through an [Option]. type Scanner struct { reader *csv.Reader record []string @@ -14,7 +28,8 @@ type Scanner struct { continueOnError bool } -// NewScanner returns a scanner configured with the provided options. +// NewScanner returns a Scanner that reads from reader, configured +// with the provided options. func NewScanner(reader io.Reader, options ...Option) *Scanner { return new(Scanner).initialize(reader).configure(options) } @@ -30,9 +45,9 @@ func (this *Scanner) configure(options []Option) *Scanner { } // Scan advances the Scanner to the next record, which will then be available -// through the Record method. It returns false when the scan stops, either by +// through the [Scanner.Record] method. It returns false when the scan stops, either by // reaching the end of the input or an error. After Scan returns false, the -// Error method will return any error that occurred during scanning, except +// [Scanner.Error] method will return any error that occurred during scanning, except // that if it was io.EOF, Error will return nil. func (this *Scanner) Scan() bool { if this.eof() { @@ -53,8 +68,10 @@ func (this *Scanner) eof() bool { } // Record returns the most recent record generated by a call to Scan as a -// []string. See *csv.Reader.ReuseRecord for details on the strategy for -// reusing the underlying array: https://golang.org/pkg/encoding/csv/#Reader +// []string. +// +// See the [ReuseRecord] Option and follow the link to the standard library +// for details on the strategy for reusing the underlying array. func (this *Scanner) Record() []string { return this.record } diff --git a/csv/struct_scanner.go b/csv/struct_scanner.go index ad991b1..e787aa5 100644 --- a/csv/struct_scanner.go +++ b/csv/struct_scanner.go @@ -6,10 +6,21 @@ import ( "reflect" ) +// StructScanner provides access to the fields of CSV-encoded +// data through a struct's fields. +// +// Like unmarshaling with the standard JSON or XML decoders, the +// fields of the struct must be exported and tagged with a `"csv:"` +// prefix. +// +// All configurations of the underlying *csv.Reader are available +// through an [Option]. type StructScanner struct { *ColumnScanner } +// NewStructScanner returns a StructScanner that reads from reader, +// configured with the provided options. func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, error) { inner, err := NewColumnScanner(reader, options...) if err != nil { @@ -18,6 +29,9 @@ func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, erro return &StructScanner{ColumnScanner: inner}, nil } +// Populate gets the most recent record generated by a call to Scan +// and stores the values for tagged fields in the value pointed to +// by v. func (this *StructScanner) Populate(v interface{}) error { type_ := reflect.TypeOf(v) if type_.Kind() != reflect.Ptr { diff --git a/fields/example_test.go b/fields/example_test.go new file mode 100644 index 0000000..aa0703f --- /dev/null +++ b/fields/example_test.go @@ -0,0 +1,33 @@ +package fields_test + +import ( + "fmt" + "log" + "strings" + + "github.com/smartystreets/scanners/fields" +) + +// Justification of fields should not affect the scanned values. +func ExampleScanner() { + in := strings.Join([]string{ + " a\t 1 foo i ", + " b\t 10 bar ii ", + " c\t100 bazzle iii", + }, "\n") + + scanner := fields.NewScanner(strings.NewReader(in)) + + for scanner.Scan() { + fmt.Println(scanner.Fields()) + } + + if err := scanner.Err(); err != nil { + log.Panic(err) + } + + // Output: + // [a 1 foo i] + // [b 10 bar ii] + // [c 100 bazzle iii] +} diff --git a/fields/scanner.go b/fields/scanner.go index d249d9d..c33e627 100644 --- a/fields/scanner.go +++ b/fields/scanner.go @@ -1,3 +1,8 @@ +// Package fields scans fields, splitting on whitespace—fields +// themselves cannot contain whitespace. +// +// Advance the scanner with the Scan method and check errors with +// the Err method, both from the underlying bufio.Scanner. package fields import ( @@ -6,14 +11,22 @@ import ( "strings" ) +// Scanner provides access to the whitespace-separated fields of +// data. Field values cannot contain any whitespace. +// +// For a file that follows the encoding scheme of a so-called TSV, use [github.com/smartystreets/scanners/csv.Scanner] +// and configure it for tabs with [github.com/smartystreets/scanners/csv.Comma]. type Scanner struct { *bufio.Scanner } +// NewScanner returns a fields scanner. func NewScanner(reader io.Reader) *Scanner { return &Scanner{Scanner: bufio.NewScanner(reader)} } +// Fields returns the most recent fields generated by a call to Scan as a +// []string. func (this *Scanner) Fields() []string { return strings.Fields(this.Text()) } diff --git a/fixedwidth/example_test.go b/fixedwidth/example_test.go new file mode 100644 index 0000000..c44a922 --- /dev/null +++ b/fixedwidth/example_test.go @@ -0,0 +1,71 @@ +package fixedwidth_test + +import ( + "fmt" + "log" + "strings" + + fw "github.com/smartystreets/scanners/fixedwidth" +) + +func ExampleScanner() { + in := strings.Join([]string{ + "name username", + "Rob Pike rob ", + "Ken Thompson ken ", + "Robert Griesemer gri ", + }, "\n") + + scanner := fw.NewScanner(strings.NewReader(in)) + + for scanner.Scan() { + var ( + name = scanner.Field(fw.Field(0, 16)) + username = scanner.Field(fw.Field(17, 8)) + ) + + fmt.Printf("* % s* %s *\n", name, username) + } + + if err := scanner.Err(); err != nil { + log.Panic(err) + } + + // Output: + // * name * username * + // * Rob Pike * rob * + // * Ken Thompson * ken * + // * Robert Griesemer* gri * +} + +var ( + namef fw.Substring = func(x string) string { return x[0:16] } + usernamef fw.Substring = func(x string) string { return x[17:25] } +) + +// Define custom [Substring] functions with particular index +// ranges. +func ExampleScanner_substring() { + in := strings.Join([]string{ + "name username", + "Rob Pike rob ", + "Ken Thompson ken ", + "Robert Griesemer gri ", + }, "\n") + + scanner := fw.NewScanner(strings.NewReader(in)) + + for scanner.Scan() { + fmt.Printf("* % s* %s *\n", scanner.Field(namef), scanner.Field(usernamef)) + } + + if err := scanner.Err(); err != nil { + log.Panic(err) + } + + // Output: + // * name * username * + // * Rob Pike * rob * + // * Ken Thompson * ken * + // * Robert Griesemer* gri * +} diff --git a/fixedwidth/scanner.go b/fixedwidth/scanner.go index 1d08776..2991421 100644 --- a/fixedwidth/scanner.go +++ b/fixedwidth/scanner.go @@ -1,3 +1,8 @@ +// Package fixedwidth scans fixed-width files and provides easy +// access to individual columns. +// +// Advance the scanner with the Scan method and check errors with +// the Err method, both from the underlying bufio.Scanner. package fixedwidth import ( @@ -13,18 +18,24 @@ func Field(index, width int) Substring { } } +// A Scanner reads records from a fixed-width-encode file. type Scanner struct { *bufio.Scanner } +// NewScanner returns a Scanner that reads from reader. func NewScanner(reader io.Reader) *Scanner { return &Scanner{Scanner: bufio.NewScanner(reader)} } +// Field returns the specified Substring from the most recent +// record generated by a call to Scanner.Scan as a string. func (this *Scanner) Field(field Substring) string { return field(this.Text()) } +// Fields returns the specified Substrings from the most recent +// record generated by a call to Scanner.Scan as a []string. func (this *Scanner) Fields(fields ...Substring) (values []string) { for _, field := range fields { values = append(values, this.Field(field)) diff --git a/scanners.go b/scanners.go index c669121..60c2cb3 100644 --- a/scanners.go +++ b/scanners.go @@ -1 +1,18 @@ +// Package scanners provides scanners for text files that encode +// data as CSV, space-delimited fields, or fixed-width columns. +// +// All three scanners either emulate or wrap a bufio.Scanner, +// and incorporate the bufio.Scanner style of defining a scan-loop, +// looping, and then checking for errors after the scan-loop has +// completed: +// +// scanner := SomeNewScanner() +// +// for scanner.Scan() { +// scanner.GetSomeValues() +// } +// +// if err := scanner.Err(); err != nil { +// log.Fatal(err) +// } package scanners