Skip to content

Commit

Permalink
Merge pull request #7 from zacharysyoung/MoreDocumentation
Browse files Browse the repository at this point in the history
added more documentation
  • Loading branch information
mdwhatcott authored Apr 24, 2024
2 parents a8eec48 + 87c0597 commit 7590197
Show file tree
Hide file tree
Showing 12 changed files with 296 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
[![Build Status](https://travis-ci.org/smartystreets/scanners.svg?branch=master)](https://travis-ci.org/smartystreets/scanners)
[![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners)
[![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners)
[![GoDoc](https://godoc.org/github.com/smartystreets/scanners?status.svg)](http://godoc.org/github.com/smartystreets/scanners)
[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/github.com/smartystreets/scanners)
18 changes: 18 additions & 0 deletions csv/column_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,24 @@ import (
"log"
)

// ColumnScanner provides access to the fields of CSV-encoded
// data by column name. The scanner assumes the first
// record in the data to be the header with column names. If
// duplicate names exist in the header, the last column for the
// duplicate name will be used.
//
// All configurations of the underlying *csv.Reader are available
// through an [Option].
type ColumnScanner struct {
*Scanner
headerRecord []string
columnIndex map[string]int
}

// NewColumnScanner returns a ColumnScanner that reads from reader,
// configured with the provided options, and assumes the first record
// to be the header. It calls Scan once to read the header; subsequent
// calls to Scan will return the remaining records.
func NewColumnScanner(reader io.Reader, options ...Option) (*ColumnScanner, error) {
inner := NewScanner(reader, append(options, FieldsPerRecord(0))...)
if !inner.Scan() {
Expand All @@ -32,10 +44,14 @@ func (this *ColumnScanner) readHeader() {
}
}

// Header returns the header record.
func (this *ColumnScanner) Header() []string {
return this.headerRecord
}

// ColumnErr returns the value for column name of the most recent
// record generated by a call to Scan as a string. It returns an
// error if column was not present in the header record.
func (this *ColumnScanner) ColumnErr(column string) (string, error) {
index, ok := this.columnIndex[column]
if !ok {
Expand All @@ -44,6 +60,8 @@ func (this *ColumnScanner) ColumnErr(column string) (string, error) {
return this.Record()[index], nil
}

// Column wraps [ColumnScanner.ColumnErr], but panics if the name was not present
// in the header record.
func (this *ColumnScanner) Column(column string) string {
value, err := this.ColumnErr(column)
if err != nil {
Expand Down
14 changes: 14 additions & 0 deletions csv/column_scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ func (this *ColumnScannerFixture) TestColumnNotFound_Panic() {
this.So(func() { this.scanner.Column("nope") }, should.Panic)
}

// TestDuplicateColumnNames confirms that duplicated/repeated
// column names results in the last repeated column being
// added to the map and used to retrieve values for that name.
func (this *ColumnScannerFixture) TestDuplicateColumnNames() {
scanner, err := NewColumnScanner(reader([]string{
"Col1,Col2,Col2",
"foo,bar,baz",
}))
this.So(err, should.BeNil)
this.So(scanner.Header(), should.Resemble, []string{"Col1", "Col2", "Col2"})
scanner.Scan()
this.So(scanner.Column("Col2"), should.Equal, "baz")
}

type User struct {
FirstName string
LastName string
Expand Down
58 changes: 58 additions & 0 deletions csv/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,61 @@ func ExampleScanner_options() {
// [Ken Thompson ken]
// [Robert Griesemer gri]
}

// A ColumnScanner maps field values in each row to column
// names. The column name is taken from the first row, which
// is assumed to be the header row.
func ExampleColumnScanner() {
in := strings.Join([]string{
`first_name,last_name,username`,
`"Rob","Pike",rob`,
`Ken,Thompson,ken`,
`"Robert","Griesemer","gri"`,
}, "\n")
scanner, _ := csv.NewColumnScanner(strings.NewReader(in))

for scanner.Scan() {
fmt.Println(scanner.Column("last_name"), scanner.Column("first_name"))
}

if err := scanner.Error(); err != nil {
log.Panic(err)
}

// Output:
// Pike Rob
// Thompson Ken
// Griesemer Robert
}

func ExampleStructScanner() {
type person struct {
Firstname string `csv:"first_name"`
Lastname string `csv:"last_name"`
Username string `csv:"username"`
}

in := strings.Join([]string{
`first_name,last_name,username`,
`"Rob","Pike",rob`,
`Ken,Thompson,ken`,
`"Robert","Griesemer","gri"`,
}, "\n")

scanner, _ := csv.NewStructScanner(strings.NewReader(in))

for scanner.Scan() {
var p person
scanner.Populate(&p)
fmt.Printf("%+v\n", p)
}

if err := scanner.Error(); err != nil {
log.Panic(err)
}

// Output:
// {Firstname:Rob Lastname:Pike Username:rob}
// {Firstname:Ken Lastname:Thompson Username:ken}
// {Firstname:Robert Lastname:Griesemer Username:gri}
}
32 changes: 23 additions & 9 deletions csv/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,37 @@ type Option func(*Scanner)
// If true is passed, continue scanning until io.EOF is reached.
// If false is passed (default), any error encountered during scanning
// will result in the next call to Scan returning false and
// the Scanner may be considered dead. See Scanner.Error() for the exact error
// (before the next call to Scanner.Scan()).
// See https://golang.org/pkg/encoding/csv/#pkg-variables
// and https://golang.org/pkg/encoding/csv/#ParseError
// for more information regarding possible error values.
// the Scanner may be considered dead. Check [Scanner.Error] for the exact error
// (before the next call to [Scanner.Scan]).
//
// See the error variables starting at [csv.ErrBareQuote], and the
// [csv.ParseError] type, for more information regarding possible
// error values.
func ContinueOnError(continue_ bool) Option {
return func(s *Scanner) { s.continueOnError = continue_ }
}
func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } }

// See the [csv.Reader.Comma] field.
func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } }

// See the [csv.Reader.Comment] field.
func Comment(comment rune) Option { return func(s *Scanner) { s.reader.Comment = comment } }

// See the [csv.Reader.FieldsPerRecord] field.
func FieldsPerRecord(fields int) Option {
return func(s *Scanner) { s.reader.FieldsPerRecord = fields }
}
func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }
func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }

// See the [csv.Reader.LazyQuotes] field.
func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }

// See the [csv.Reader.ReuseRecord] field.
func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }

// See the [csv.Reader.TrimLeadingSpace] field.
func TrimLeadingSpace(trim bool) Option { return func(s *Scanner) { s.reader.TrimLeadingSpace = trim } }
func SkipHeaderRecord() Option { return SkipRecords(1) }

func SkipHeaderRecord() Option { return SkipRecords(1) }
func SkipRecords(count int) Option {
return func(s *Scanner) {
for x := 0; x < count; x++ {
Expand Down
29 changes: 23 additions & 6 deletions csv/scanner.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
// Package csv scans CSV files, provides easy access to individual
// columns, and can also read field values into a struct (analogous
// to unmarshaling JSON or XML).
//
// It thinly wraps the standard library's [csv.Reader] and exposes
// most of its configuration "knobs" and behavior. Knowledge of
// the csv.Reader will help in configuring and running these
// scanners.
//
// Advance the scanners with the Scan method and check errors with
// the Error method (unlike fields and fixedwidth, which use Err).
package csv

import (
"encoding/csv"
"io"
)

// Scanner wraps a csv.Reader via an API similar to that of bufio.Scanner.
// Scanner provides access to the fields of CSV-encoded data.
//
// All configurations of the underlying *csv.Reader are available
// through an [Option].
type Scanner struct {
reader *csv.Reader
record []string
Expand All @@ -14,7 +28,8 @@ type Scanner struct {
continueOnError bool
}

// NewScanner returns a scanner configured with the provided options.
// NewScanner returns a Scanner that reads from reader, configured
// with the provided options.
func NewScanner(reader io.Reader, options ...Option) *Scanner {
return new(Scanner).initialize(reader).configure(options)
}
Expand All @@ -30,9 +45,9 @@ func (this *Scanner) configure(options []Option) *Scanner {
}

// Scan advances the Scanner to the next record, which will then be available
// through the Record method. It returns false when the scan stops, either by
// through the [Scanner.Record] method. It returns false when the scan stops, either by
// reaching the end of the input or an error. After Scan returns false, the
// Error method will return any error that occurred during scanning, except
// [Scanner.Error] method will return any error that occurred during scanning, except
// that if it was io.EOF, Error will return nil.
func (this *Scanner) Scan() bool {
if this.eof() {
Expand All @@ -53,8 +68,10 @@ func (this *Scanner) eof() bool {
}

// Record returns the most recent record generated by a call to Scan as a
// []string. See *csv.Reader.ReuseRecord for details on the strategy for
// reusing the underlying array: https://golang.org/pkg/encoding/csv/#Reader
// []string.
//
// See the [ReuseRecord] Option and follow the link to the standard library
// for details on the strategy for reusing the underlying array.
func (this *Scanner) Record() []string {
return this.record
}
Expand Down
14 changes: 14 additions & 0 deletions csv/struct_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,21 @@ import (
"reflect"
)

// StructScanner provides access to the fields of CSV-encoded
// data through a struct's fields.
//
// Like unmarshaling with the standard JSON or XML decoders, the
// fields of the struct must be exported and tagged with a `"csv:"`
// prefix.
//
// All configurations of the underlying *csv.Reader are available
// through an [Option].
type StructScanner struct {
*ColumnScanner
}

// NewStructScanner returns a StructScanner that reads from reader,
// configured with the provided options.
func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, error) {
inner, err := NewColumnScanner(reader, options...)
if err != nil {
Expand All @@ -18,6 +29,9 @@ func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, erro
return &StructScanner{ColumnScanner: inner}, nil
}

// Populate gets the most recent record generated by a call to Scan
// and stores the values for tagged fields in the value pointed to
// by v.
func (this *StructScanner) Populate(v interface{}) error {
type_ := reflect.TypeOf(v)
if type_.Kind() != reflect.Ptr {
Expand Down
33 changes: 33 additions & 0 deletions fields/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package fields_test

import (
"fmt"
"log"
"strings"

"github.com/smartystreets/scanners/fields"
)

// Justification of fields should not affect the scanned values.
func ExampleScanner() {
in := strings.Join([]string{
" a\t 1 foo i ",
" b\t 10 bar ii ",
" c\t100 bazzle iii",
}, "\n")

scanner := fields.NewScanner(strings.NewReader(in))

for scanner.Scan() {
fmt.Println(scanner.Fields())
}

if err := scanner.Err(); err != nil {
log.Panic(err)
}

// Output:
// [a 1 foo i]
// [b 10 bar ii]
// [c 100 bazzle iii]
}
13 changes: 13 additions & 0 deletions fields/scanner.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
// Package fields scans fields, splitting on whitespace—fields
// themselves cannot contain whitespace.
//
// Advance the scanner with the Scan method and check errors with
// the Err method, both from the underlying bufio.Scanner.
package fields

import (
Expand All @@ -6,14 +11,22 @@ import (
"strings"
)

// Scanner provides access to the whitespace-separated fields of
// data. Field values cannot contain any whitespace.
//
// For a file that follows the encoding scheme of a so-called TSV, use [github.com/smartystreets/scanners/csv.Scanner]
// and configure it for tabs with [github.com/smartystreets/scanners/csv.Comma].
type Scanner struct {
*bufio.Scanner
}

// NewScanner returns a fields scanner.
func NewScanner(reader io.Reader) *Scanner {
return &Scanner{Scanner: bufio.NewScanner(reader)}
}

// Fields returns the most recent fields generated by a call to Scan as a
// []string.
func (this *Scanner) Fields() []string {
return strings.Fields(this.Text())
}
Loading

0 comments on commit 7590197

Please sign in to comment.