Merge pull request #7 from zacharysyoung/MoreDocumentation

added more documentation
smartystreets · Apr 24, 2024 · 7590197 · 7590197
2 parents a8eec48 + 87c0597
commit 7590197
Show file tree

Hide file tree

Showing 12 changed files with 296 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -3,4 +3,4 @@
 [![Build Status](https://travis-ci.org/smartystreets/scanners.svg?branch=master)](https://travis-ci.org/smartystreets/scanners)
 [![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners)
 [![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners)
-[![GoDoc](https://godoc.org/github.com/smartystreets/scanners?status.svg)](http://godoc.org/github.com/smartystreets/scanners)
+[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/github.com/smartystreets/scanners)
diff --git a/csv/column_scanner.go b/csv/column_scanner.go
@@ -6,12 +6,24 @@ import (
 	"log"
 )
 
+// ColumnScanner provides access to the fields of CSV-encoded
+// data by column name.  The scanner assumes the first
+// record in the data to be the header with column names.  If
+// duplicate names exist in the header, the last column for the
+// duplicate name will be used.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type ColumnScanner struct {
 	*Scanner
 	headerRecord []string
 	columnIndex  map[string]int
 }
 
+// NewColumnScanner returns a ColumnScanner that reads from reader,
+// configured with the provided options, and assumes the first record
+// to be the header.  It calls Scan once to read the header; subsequent
+// calls to Scan will return the remaining records.
 func NewColumnScanner(reader io.Reader, options ...Option) (*ColumnScanner, error) {
 	inner := NewScanner(reader, append(options, FieldsPerRecord(0))...)
 	if !inner.Scan() {
@@ -32,10 +44,14 @@ func (this *ColumnScanner) readHeader() {
 	}
 }
 
+// Header returns the header record.
 func (this *ColumnScanner) Header() []string {
 	return this.headerRecord
 }
 
+// ColumnErr returns the value for column name of the most recent
+// record generated by a call to Scan as a string.  It returns an
+// error if column was not present in the header record.
 func (this *ColumnScanner) ColumnErr(column string) (string, error) {
 	index, ok := this.columnIndex[column]
 	if !ok {
@@ -44,6 +60,8 @@ func (this *ColumnScanner) ColumnErr(column string) (string, error) {
 	return this.Record()[index], nil
 }
 
+// Column wraps [ColumnScanner.ColumnErr], but panics if the name was not present
+// in the header record.
 func (this *ColumnScanner) Column(column string) string {
 	value, err := this.ColumnErr(column)
 	if err != nil {

diff --git a/csv/column_scanner_test.go b/csv/column_scanner_test.go
@@ -69,6 +69,20 @@ func (this *ColumnScannerFixture) TestColumnNotFound_Panic() {
 	this.So(func() { this.scanner.Column("nope") }, should.Panic)
 }
 
+// TestDuplicateColumnNames confirms that duplicated/repeated
+// column names results in the last repeated column being
+// added to the map and used to retrieve values for that name.
+func (this *ColumnScannerFixture) TestDuplicateColumnNames() {
+	scanner, err := NewColumnScanner(reader([]string{
+		"Col1,Col2,Col2",
+		"foo,bar,baz",
+	}))
+	this.So(err, should.BeNil)
+	this.So(scanner.Header(), should.Resemble, []string{"Col1", "Col2", "Col2"})
+	scanner.Scan()
+	this.So(scanner.Column("Col2"), should.Equal, "baz")
+}
+
 type User struct {
 	FirstName string
 	LastName  string

diff --git a/csv/example_test.go b/csv/example_test.go
@@ -59,3 +59,61 @@ func ExampleScanner_options() {
 	// [Ken Thompson ken]
 	// [Robert Griesemer gri]
 }
+
+// A ColumnScanner maps field values in each row to column
+// names.  The column name is taken from the first row, which
+// is assumed to be the header row.
+func ExampleColumnScanner() {
+	in := strings.Join([]string{
+		`first_name,last_name,username`,
+		`"Rob","Pike",rob`,
+		`Ken,Thompson,ken`,
+		`"Robert","Griesemer","gri"`,
+	}, "\n")
+	scanner, _ := csv.NewColumnScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		fmt.Println(scanner.Column("last_name"), scanner.Column("first_name"))
+	}
+
+	if err := scanner.Error(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// Pike Rob
+	// Thompson Ken
+	// Griesemer Robert
+}
+
+func ExampleStructScanner() {
+	type person struct {
+		Firstname string `csv:"first_name"`
+		Lastname  string `csv:"last_name"`
+		Username  string `csv:"username"`
+	}
+
+	in := strings.Join([]string{
+		`first_name,last_name,username`,
+		`"Rob","Pike",rob`,
+		`Ken,Thompson,ken`,
+		`"Robert","Griesemer","gri"`,
+	}, "\n")
+
+	scanner, _ := csv.NewStructScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		var p person
+		scanner.Populate(&p)
+		fmt.Printf("%+v\n", p)
+	}
+
+	if err := scanner.Error(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// {Firstname:Rob Lastname:Pike Username:rob}
+	// {Firstname:Ken Lastname:Thompson Username:ken}
+	// {Firstname:Robert Lastname:Griesemer Username:gri}
+}
diff --git a/csv/options.go b/csv/options.go
@@ -8,23 +8,37 @@ type Option func(*Scanner)
 // If true is passed, continue scanning until io.EOF is reached.
 // If false is passed (default), any error encountered during scanning
 // will result in the next call to Scan returning false and
-// the Scanner may be considered dead. See Scanner.Error() for the exact error
-// (before the next call to Scanner.Scan()).
-// See https://golang.org/pkg/encoding/csv/#pkg-variables
-// and https://golang.org/pkg/encoding/csv/#ParseError
-// for more information regarding possible error values.
+// the Scanner may be considered dead. Check [Scanner.Error] for the exact error
+// (before the next call to [Scanner.Scan]).
+//
+// See the error variables starting at [csv.ErrBareQuote], and the
+// [csv.ParseError] type, for more information regarding possible
+// error values.
 func ContinueOnError(continue_ bool) Option {
 	return func(s *Scanner) { s.continueOnError = continue_ }
 }
-func Comma(comma rune) Option     { return func(s *Scanner) { s.reader.Comma = comma } }
+
+// See the [csv.Reader.Comma] field.
+func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } }
+
+// See the [csv.Reader.Comment] field.
 func Comment(comment rune) Option { return func(s *Scanner) { s.reader.Comment = comment } }
+
+// See the [csv.Reader.FieldsPerRecord] field.
 func FieldsPerRecord(fields int) Option {
 	return func(s *Scanner) { s.reader.FieldsPerRecord = fields }
 }
-func LazyQuotes(lazy bool) Option       { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }
-func ReuseRecord(reuse bool) Option     { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }
+
+// See the [csv.Reader.LazyQuotes] field.
+func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }
+
+// See the [csv.Reader.ReuseRecord] field.
+func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }
+
+// See the [csv.Reader.TrimLeadingSpace] field.
 func TrimLeadingSpace(trim bool) Option { return func(s *Scanner) { s.reader.TrimLeadingSpace = trim } }
-func SkipHeaderRecord() Option          { return SkipRecords(1) }
+
+func SkipHeaderRecord() Option { return SkipRecords(1) }
 func SkipRecords(count int) Option {
 	return func(s *Scanner) {
 		for x := 0; x < count; x++ {

diff --git a/csv/scanner.go b/csv/scanner.go
@@ -1,11 +1,25 @@
+// Package csv scans CSV files, provides easy access to individual
+// columns, and can also read field values into a struct (analogous
+// to unmarshaling JSON or XML).
+//
+// It thinly wraps the standard library's [csv.Reader] and exposes
+// most of its configuration "knobs" and behavior.  Knowledge of
+// the csv.Reader will help in configuring and running these
+// scanners.
+//
+// Advance the scanners with the Scan method and check errors with
+// the Error method (unlike fields and fixedwidth, which use Err).
 package csv
 
 import (
 	"encoding/csv"
 	"io"
 )
 
-// Scanner wraps a csv.Reader via an API similar to that of bufio.Scanner.
+// Scanner provides access to the fields of CSV-encoded data.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type Scanner struct {
 	reader *csv.Reader
 	record []string
@@ -14,7 +28,8 @@ type Scanner struct {
 	continueOnError bool
 }
 
-// NewScanner returns a scanner configured with the provided options.
+// NewScanner returns a Scanner that reads from reader, configured
+// with the provided options.
 func NewScanner(reader io.Reader, options ...Option) *Scanner {
 	return new(Scanner).initialize(reader).configure(options)
 }
@@ -30,9 +45,9 @@ func (this *Scanner) configure(options []Option) *Scanner {
 }
 
 // Scan advances the Scanner to the next record, which will then be available
-// through the Record method. It returns false when the scan stops, either by
+// through the [Scanner.Record] method. It returns false when the scan stops, either by
 // reaching the end of the input or an error. After Scan returns false, the
-// Error method will return any error that occurred during scanning, except
+// [Scanner.Error] method will return any error that occurred during scanning, except
 // that if it was io.EOF, Error will return nil.
 func (this *Scanner) Scan() bool {
 	if this.eof() {
@@ -53,8 +68,10 @@ func (this *Scanner) eof() bool {
 }
 
 // Record returns the most recent record generated by a call to Scan as a
-// []string. See *csv.Reader.ReuseRecord for details on the strategy for
-// reusing the underlying array: https://golang.org/pkg/encoding/csv/#Reader
+// []string.
+//
+// See the [ReuseRecord] Option and follow the link to the standard library
+// for details on the strategy for reusing the underlying array.
 func (this *Scanner) Record() []string {
 	return this.record
 }

diff --git a/csv/struct_scanner.go b/csv/struct_scanner.go
@@ -6,10 +6,21 @@ import (
 	"reflect"
 )
 
+// StructScanner provides access to the fields of CSV-encoded
+// data through a struct's fields.
+//
+// Like unmarshaling with the standard JSON or XML decoders, the
+// fields of the struct must be exported and tagged with a `"csv:"`
+// prefix.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type StructScanner struct {
 	*ColumnScanner
 }
 
+// NewStructScanner returns a StructScanner that reads from reader,
+// configured with the provided options.
 func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, error) {
 	inner, err := NewColumnScanner(reader, options...)
 	if err != nil {
@@ -18,6 +29,9 @@ func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, erro
 	return &StructScanner{ColumnScanner: inner}, nil
 }
 
+// Populate gets the most recent record generated by a call to Scan
+// and stores the values for tagged fields in the value pointed to
+// by v.
 func (this *StructScanner) Populate(v interface{}) error {
 	type_ := reflect.TypeOf(v)
 	if type_.Kind() != reflect.Ptr {

diff --git a/fields/example_test.go b/fields/example_test.go
@@ -0,0 +1,33 @@
+package fields_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	"github.com/smartystreets/scanners/fields"
+)
+
+// Justification of fields should not affect the scanned values.
+func ExampleScanner() {
+	in := strings.Join([]string{
+		"  a\t  1   foo    i  ",
+		"  b\t 10   bar    ii ",
+		"  c\t100  bazzle  iii",
+	}, "\n")
+
+	scanner := fields.NewScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		fmt.Println(scanner.Fields())
+	}
+
+	if err := scanner.Err(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// [a 1 foo i]
+	// [b 10 bar ii]
+	// [c 100 bazzle iii]
+}
diff --git a/fields/scanner.go b/fields/scanner.go
@@ -1,3 +1,8 @@
+// Package fields scans fields, splitting on whitespace—fields
+// themselves cannot contain whitespace.
+//
+// Advance the scanner with the Scan method and check errors with
+// the Err method, both from the underlying bufio.Scanner.
 package fields
 
 import (
@@ -6,14 +11,22 @@ import (
 	"strings"
 )
 
+// Scanner provides access to the whitespace-separated fields of
+// data.  Field values cannot contain any whitespace.
+//
+// For a file that follows the encoding scheme of a so-called TSV, use [github.com/smartystreets/scanners/csv.Scanner]
+// and configure it for tabs with [github.com/smartystreets/scanners/csv.Comma].
 type Scanner struct {
 	*bufio.Scanner
 }
 
+// NewScanner returns a fields scanner.
 func NewScanner(reader io.Reader) *Scanner {
 	return &Scanner{Scanner: bufio.NewScanner(reader)}
 }
 
+// Fields returns the most recent fields generated by a call to Scan as a
+// []string.
 func (this *Scanner) Fields() []string {
 	return strings.Fields(this.Text())
 }