From 75bf10ba9c0ee97337a149ea60571a3800420343 Mon Sep 17 00:00:00 2001
From: Zach Young <zacharysyoung@gmail.com>
Date: Wed, 24 Apr 2024 14:46:13 -0700
Subject: [PATCH 1/4] added more documentation

---
 README.md                  |  2 ++
 csv/column_scanner.go      | 16 +++++++++
 csv/column_scanner_test.go | 14 ++++++++
 csv/example_test.go        | 58 +++++++++++++++++++++++++++++++
 csv/options.go             | 32 ++++++++++++-----
 csv/scanner.go             | 29 ++++++++++++----
 csv/struct_scanner.go      | 14 ++++++++
 fields/example_test.go     | 33 ++++++++++++++++++
 fields/scanner.go          | 13 +++++++
 fixedwidth/example_test.go | 71 ++++++++++++++++++++++++++++++++++++++
 fixedwidth/scanner.go      | 11 ++++++
 scanners.go                | 17 +++++++++
 12 files changed, 295 insertions(+), 15 deletions(-)
 create mode 100644 fields/example_test.go
 create mode 100644 fixedwidth/example_test.go

diff --git a/README.md b/README.md
index cc57244..cc7e004 100644
--- a/README.md
+++ b/README.md
@@ -4,3 +4,5 @@
 [![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners)
 [![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners)
 [![GoDoc](https://godoc.org/github.com/smartystreets/scanners?status.svg)](http://godoc.org/github.com/smartystreets/scanners)
+
+Explore [the package](https://pkg.go.dev/github.com/smartystreets/scanners) on pkg.go.dev.
diff --git a/csv/column_scanner.go b/csv/column_scanner.go
index 1f55511..5a818c4 100644
--- a/csv/column_scanner.go
+++ b/csv/column_scanner.go
@@ -6,12 +6,22 @@ import (
 	"log"
 )
 
+// ColumnScanner provides access to the fields of CSV-encoded
+// data by column name.  The scanner assumes the first
+// record in the data to be the header with column names.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type ColumnScanner struct {
 	*Scanner
 	headerRecord []string
 	columnIndex  map[string]int
 }
 
+// NewColumnScanner returns a ColumnScanner that reads from reader,
+// configured with the provided options, and assumes the first record
+// to be the header.  It calls Scan once to read the header; subsequent
+// calls to Scan will return the remaining records.
 func NewColumnScanner(reader io.Reader, options ...Option) (*ColumnScanner, error) {
 	inner := NewScanner(reader, append(options, FieldsPerRecord(0))...)
 	if !inner.Scan() {
@@ -32,10 +42,14 @@ func (this *ColumnScanner) readHeader() {
 	}
 }
 
+// Header returns the header record.
 func (this *ColumnScanner) Header() []string {
 	return this.headerRecord
 }
 
+// ColumnErr returns the value for column name of the most recent
+// record generated by a call to Scan as a string.  It returns an
+// error if column was not present in the header record.
 func (this *ColumnScanner) ColumnErr(column string) (string, error) {
 	index, ok := this.columnIndex[column]
 	if !ok {
@@ -44,6 +58,8 @@ func (this *ColumnScanner) ColumnErr(column string) (string, error) {
 	return this.Record()[index], nil
 }
 
+// Column wraps [ColumnScanner.ColumnErr], but panics if the name was not present
+// in the header record.
 func (this *ColumnScanner) Column(column string) string {
 	value, err := this.ColumnErr(column)
 	if err != nil {
diff --git a/csv/column_scanner_test.go b/csv/column_scanner_test.go
index 945ae8b..9541069 100644
--- a/csv/column_scanner_test.go
+++ b/csv/column_scanner_test.go
@@ -69,6 +69,20 @@ func (this *ColumnScannerFixture) TestColumnNotFound_Panic() {
 	this.So(func() { this.scanner.Column("nope") }, should.Panic)
 }
 
+// TestDuplicateColumnNames confirms that duplicated/repeated
+// column names results in the last repeated column being
+// added to the map and used to retrieve values for that name.
+func (this *ColumnScannerFixture) TestDuplicateColumnNames() {
+	scanner, err := NewColumnScanner(reader([]string{
+		"Col1,Col2,Col2",
+		"foo,bar,baz",
+	}))
+	this.So(err, should.BeNil)
+	this.So(scanner.Header(), should.Resemble, []string{"Col1", "Col2", "Col2"})
+	scanner.Scan()
+	this.So(scanner.Column("Col2"), should.Equal, "baz")
+}
+
 type User struct {
 	FirstName string
 	LastName  string
diff --git a/csv/example_test.go b/csv/example_test.go
index 1e68629..a453b39 100644
--- a/csv/example_test.go
+++ b/csv/example_test.go
@@ -59,3 +59,61 @@ func ExampleScanner_options() {
 	// [Ken Thompson ken]
 	// [Robert Griesemer gri]
 }
+
+// A ColumnScanner maps field values in each row to column
+// names.  The column name is taken from the first row, which
+// is assumed to be the header row.
+func ExampleColumnScanner() {
+	in := strings.Join([]string{
+		`first_name,last_name,username`,
+		`"Rob","Pike",rob`,
+		`Ken,Thompson,ken`,
+		`"Robert","Griesemer","gri"`,
+	}, "\n")
+	scanner, _ := csv.NewColumnScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		fmt.Println(scanner.Column("last_name"), scanner.Column("first_name"))
+	}
+
+	if err := scanner.Error(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// Pike Rob
+	// Thompson Ken
+	// Griesemer Robert
+}
+
+func ExampleStructScanner() {
+	type person struct {
+		Firstname string `csv:"first_name"`
+		Lastname  string `csv:"last_name"`
+		Username  string `csv:"username"`
+	}
+
+	in := strings.Join([]string{
+		`first_name,last_name,username`,
+		`"Rob","Pike",rob`,
+		`Ken,Thompson,ken`,
+		`"Robert","Griesemer","gri"`,
+	}, "\n")
+
+	scanner, _ := csv.NewStructScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		var p person
+		scanner.Populate(&p)
+		fmt.Printf("%+v\n", p)
+	}
+
+	if err := scanner.Error(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// {Firstname:Rob Lastname:Pike Username:rob}
+	// {Firstname:Ken Lastname:Thompson Username:ken}
+	// {Firstname:Robert Lastname:Griesemer Username:gri}
+}
diff --git a/csv/options.go b/csv/options.go
index c07fbae..c6989a7 100644
--- a/csv/options.go
+++ b/csv/options.go
@@ -8,23 +8,37 @@ type Option func(*Scanner)
 // If true is passed, continue scanning until io.EOF is reached.
 // If false is passed (default), any error encountered during scanning
 // will result in the next call to Scan returning false and
-// the Scanner may be considered dead. See Scanner.Error() for the exact error
-// (before the next call to Scanner.Scan()).
-// See https://golang.org/pkg/encoding/csv/#pkg-variables
-// and https://golang.org/pkg/encoding/csv/#ParseError
-// for more information regarding possible error values.
+// the Scanner may be considered dead. Check [Scanner.Error] for the exact error
+// (before the next call to [Scanner.Scan]).
+//
+// See the error variables starting at [csv.ErrBareQuote], and the
+// [csv.ParseError] type, for more information regarding possible
+// error values.
 func ContinueOnError(continue_ bool) Option {
 	return func(s *Scanner) { s.continueOnError = continue_ }
 }
-func Comma(comma rune) Option     { return func(s *Scanner) { s.reader.Comma = comma } }
+
+// See the [csv.Reader.Comma] field.
+func Comma(comma rune) Option { return func(s *Scanner) { s.reader.Comma = comma } }
+
+// See the [csv.Reader.Comment] field.
 func Comment(comment rune) Option { return func(s *Scanner) { s.reader.Comment = comment } }
+
+// See the [csv.Reader.FieldsPerRecord] field.
 func FieldsPerRecord(fields int) Option {
 	return func(s *Scanner) { s.reader.FieldsPerRecord = fields }
 }
-func LazyQuotes(lazy bool) Option       { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }
-func ReuseRecord(reuse bool) Option     { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }
+
+// See the [csv.Reader.LazyQuotes] field.
+func LazyQuotes(lazy bool) Option { return func(s *Scanner) { s.reader.LazyQuotes = lazy } }
+
+// See the [csv.Reader.ReuseRecord] field.
+func ReuseRecord(reuse bool) Option { return func(s *Scanner) { s.reader.ReuseRecord = reuse } }
+
+// See the [csv.Reader.TrimLeadingSpace] field.
 func TrimLeadingSpace(trim bool) Option { return func(s *Scanner) { s.reader.TrimLeadingSpace = trim } }
-func SkipHeaderRecord() Option          { return SkipRecords(1) }
+
+func SkipHeaderRecord() Option { return SkipRecords(1) }
 func SkipRecords(count int) Option {
 	return func(s *Scanner) {
 		for x := 0; x < count; x++ {
diff --git a/csv/scanner.go b/csv/scanner.go
index b02e336..b6391f2 100644
--- a/csv/scanner.go
+++ b/csv/scanner.go
@@ -1,3 +1,14 @@
+// Package csv scans CSV files, provides easy access to individual
+// columns, and can also read field values into a struct (analogous
+// to unmarshaling JSON or XML).
+//
+// It thinly wraps the standard library's [csv.Reader] and exposes
+// most of its configuration "knobs" and behavior.  Knowledge of
+// the csv.Reader will help in configuring and running these
+// scanners.
+//
+// Advance the scanners with the Scan method and check errors with
+// the Error method (unlike fields and fixedwidth, which use Err).
 package csv
 
 import (
@@ -5,7 +16,10 @@ import (
 	"io"
 )
 
-// Scanner wraps a csv.Reader via an API similar to that of bufio.Scanner.
+// Scanner provides access to the fields of CSV-encoded data.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type Scanner struct {
 	reader *csv.Reader
 	record []string
@@ -14,7 +28,8 @@ type Scanner struct {
 	continueOnError bool
 }
 
-// NewScanner returns a scanner configured with the provided options.
+// NewScanner returns a Scanner that reads from reader, configured
+// with the provided options.
 func NewScanner(reader io.Reader, options ...Option) *Scanner {
 	return new(Scanner).initialize(reader).configure(options)
 }
@@ -30,9 +45,9 @@ func (this *Scanner) configure(options []Option) *Scanner {
 }
 
 // Scan advances the Scanner to the next record, which will then be available
-// through the Record method. It returns false when the scan stops, either by
+// through the [Scanner.Record] method. It returns false when the scan stops, either by
 // reaching the end of the input or an error. After Scan returns false, the
-// Error method will return any error that occurred during scanning, except
+// [Scanner.Error] method will return any error that occurred during scanning, except
 // that if it was io.EOF, Error will return nil.
 func (this *Scanner) Scan() bool {
 	if this.eof() {
@@ -53,8 +68,10 @@ func (this *Scanner) eof() bool {
 }
 
 // Record returns the most recent record generated by a call to Scan as a
-// []string. See *csv.Reader.ReuseRecord for details on the strategy for
-// reusing the underlying array: https://golang.org/pkg/encoding/csv/#Reader
+// []string.
+//
+// See the [ReuseRecord] Option and follow the link to the standard library
+// for details on the strategy for reusing the underlying array.
 func (this *Scanner) Record() []string {
 	return this.record
 }
diff --git a/csv/struct_scanner.go b/csv/struct_scanner.go
index ad991b1..e787aa5 100644
--- a/csv/struct_scanner.go
+++ b/csv/struct_scanner.go
@@ -6,10 +6,21 @@ import (
 	"reflect"
 )
 
+// StructScanner provides access to the fields of CSV-encoded
+// data through a struct's fields.
+//
+// Like unmarshaling with the standard JSON or XML decoders, the
+// fields of the struct must be exported and tagged with a `"csv:"`
+// prefix.
+//
+// All configurations of the underlying *csv.Reader are available
+// through an [Option].
 type StructScanner struct {
 	*ColumnScanner
 }
 
+// NewStructScanner returns a StructScanner that reads from reader,
+// configured with the provided options.
 func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, error) {
 	inner, err := NewColumnScanner(reader, options...)
 	if err != nil {
@@ -18,6 +29,9 @@ func NewStructScanner(reader io.Reader, options ...Option) (*StructScanner, erro
 	return &StructScanner{ColumnScanner: inner}, nil
 }
 
+// Populate gets the most recent record generated by a call to Scan
+// and stores the values for tagged fields in the value pointed to
+// by v.
 func (this *StructScanner) Populate(v interface{}) error {
 	type_ := reflect.TypeOf(v)
 	if type_.Kind() != reflect.Ptr {
diff --git a/fields/example_test.go b/fields/example_test.go
new file mode 100644
index 0000000..aa0703f
--- /dev/null
+++ b/fields/example_test.go
@@ -0,0 +1,33 @@
+package fields_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	"github.com/smartystreets/scanners/fields"
+)
+
+// Justification of fields should not affect the scanned values.
+func ExampleScanner() {
+	in := strings.Join([]string{
+		"  a\t  1   foo    i  ",
+		"  b\t 10   bar    ii ",
+		"  c\t100  bazzle  iii",
+	}, "\n")
+
+	scanner := fields.NewScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		fmt.Println(scanner.Fields())
+	}
+
+	if err := scanner.Err(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// [a 1 foo i]
+	// [b 10 bar ii]
+	// [c 100 bazzle iii]
+}
diff --git a/fields/scanner.go b/fields/scanner.go
index d249d9d..c33e627 100644
--- a/fields/scanner.go
+++ b/fields/scanner.go
@@ -1,3 +1,8 @@
+// Package fields scans fields, splitting on whitespace—fields
+// themselves cannot contain whitespace.
+//
+// Advance the scanner with the Scan method and check errors with
+// the Err method, both from the underlying bufio.Scanner.
 package fields
 
 import (
@@ -6,14 +11,22 @@ import (
 	"strings"
 )
 
+// Scanner provides access to the whitespace-separated fields of
+// data.  Field values cannot contain any whitespace.
+//
+// For a file that follows the encoding scheme of a so-called TSV, use [github.com/smartystreets/scanners/csv.Scanner]
+// and configure it for tabs with [github.com/smartystreets/scanners/csv.Comma].
 type Scanner struct {
 	*bufio.Scanner
 }
 
+// NewScanner returns a fields scanner.
 func NewScanner(reader io.Reader) *Scanner {
 	return &Scanner{Scanner: bufio.NewScanner(reader)}
 }
 
+// Fields returns the most recent fields generated by a call to Scan as a
+// []string.
 func (this *Scanner) Fields() []string {
 	return strings.Fields(this.Text())
 }
diff --git a/fixedwidth/example_test.go b/fixedwidth/example_test.go
new file mode 100644
index 0000000..c44a922
--- /dev/null
+++ b/fixedwidth/example_test.go
@@ -0,0 +1,71 @@
+package fixedwidth_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	fw "github.com/smartystreets/scanners/fixedwidth"
+)
+
+func ExampleScanner() {
+	in := strings.Join([]string{
+		"name             username",
+		"Rob Pike         rob     ",
+		"Ken Thompson     ken     ",
+		"Robert Griesemer gri     ",
+	}, "\n")
+
+	scanner := fw.NewScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		var (
+			name     = scanner.Field(fw.Field(0, 16))
+			username = scanner.Field(fw.Field(17, 8))
+		)
+
+		fmt.Printf("* % s* %s *\n", name, username)
+	}
+
+	if err := scanner.Err(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// * name            * username *
+	// * Rob Pike        * rob      *
+	// * Ken Thompson    * ken      *
+	// * Robert Griesemer* gri      *
+}
+
+var (
+	namef     fw.Substring = func(x string) string { return x[0:16] }
+	usernamef fw.Substring = func(x string) string { return x[17:25] }
+)
+
+// Define custom [Substring] functions with particular index
+// ranges.
+func ExampleScanner_substring() {
+	in := strings.Join([]string{
+		"name             username",
+		"Rob Pike         rob     ",
+		"Ken Thompson     ken     ",
+		"Robert Griesemer gri     ",
+	}, "\n")
+
+	scanner := fw.NewScanner(strings.NewReader(in))
+
+	for scanner.Scan() {
+		fmt.Printf("* % s* %s *\n", scanner.Field(namef), scanner.Field(usernamef))
+	}
+
+	if err := scanner.Err(); err != nil {
+		log.Panic(err)
+	}
+
+	// Output:
+	// * name            * username *
+	// * Rob Pike        * rob      *
+	// * Ken Thompson    * ken      *
+	// * Robert Griesemer* gri      *
+}
diff --git a/fixedwidth/scanner.go b/fixedwidth/scanner.go
index 1d08776..2991421 100644
--- a/fixedwidth/scanner.go
+++ b/fixedwidth/scanner.go
@@ -1,3 +1,8 @@
+// Package fixedwidth scans fixed-width files and provides easy
+// access to individual columns.
+//
+// Advance the scanner with the Scan method and check errors with
+// the Err method, both from the underlying bufio.Scanner.
 package fixedwidth
 
 import (
@@ -13,18 +18,24 @@ func Field(index, width int) Substring {
 	}
 }
 
+// A Scanner reads records from a fixed-width-encode file.
 type Scanner struct {
 	*bufio.Scanner
 }
 
+// NewScanner returns a Scanner that reads from reader.
 func NewScanner(reader io.Reader) *Scanner {
 	return &Scanner{Scanner: bufio.NewScanner(reader)}
 }
 
+// Field returns the specified Substring from the most recent
+// record generated by a call to Scanner.Scan as a string.
 func (this *Scanner) Field(field Substring) string {
 	return field(this.Text())
 }
 
+// Fields returns the specified Substrings from the most recent
+// record generated by a call to Scanner.Scan as a []string.
 func (this *Scanner) Fields(fields ...Substring) (values []string) {
 	for _, field := range fields {
 		values = append(values, this.Field(field))
diff --git a/scanners.go b/scanners.go
index c669121..60c2cb3 100644
--- a/scanners.go
+++ b/scanners.go
@@ -1 +1,18 @@
+// Package scanners provides scanners for text files that encode
+// data as CSV, space-delimited fields, or fixed-width columns.
+//
+// All three scanners either emulate or wrap a bufio.Scanner,
+// and incorporate the bufio.Scanner style of defining a scan-loop,
+// looping, and then checking for errors after the scan-loop has
+// completed:
+//
+//	scanner := SomeNewScanner()
+//
+//	for scanner.Scan() {
+//	    scanner.GetSomeValues()
+//	}
+//
+//	if err := scanner.Err(); err != nil {
+//	    log.Fatal(err)
+//	}
 package scanners

From 2d2282869e5f3553489327441d9d30c6012b1fd4 Mon Sep 17 00:00:00 2001
From: Zach Young <zacharysyoung@gmail.com>
Date: Wed, 24 Apr 2024 15:22:30 -0700
Subject: [PATCH 2/4] added blurb about duplicate header behavior

---
 csv/column_scanner.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/csv/column_scanner.go b/csv/column_scanner.go
index 5a818c4..9f37643 100644
--- a/csv/column_scanner.go
+++ b/csv/column_scanner.go
@@ -8,7 +8,9 @@ import (
 
 // ColumnScanner provides access to the fields of CSV-encoded
 // data by column name.  The scanner assumes the first
-// record in the data to be the header with column names.
+// record in the data to be the header with column names.  If
+// duplicate names exist in the header, the last column for the
+// duplicate name will be used.
 //
 // All configurations of the underlying *csv.Reader are available
 // through an [Option].

From 98bc9b88d13217e5257a1897822e1c144e20bfe5 Mon Sep 17 00:00:00 2001
From: Zach Young <zacharysyoung@gmail.com>
Date: Wed, 24 Apr 2024 15:26:37 -0700
Subject: [PATCH 3/4] updated GoDoc badge link; removed superfluous link

---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index cc7e004..295867e 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,4 @@
 [![Build Status](https://travis-ci.org/smartystreets/scanners.svg?branch=master)](https://travis-ci.org/smartystreets/scanners)
 [![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners)
 [![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners)
-[![GoDoc](https://godoc.org/github.com/smartystreets/scanners?status.svg)](http://godoc.org/github.com/smartystreets/scanners)
-
-Explore [the package](https://pkg.go.dev/github.com/smartystreets/scanners) on pkg.go.dev.
+[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/badge/github.com/smartystreets/scanners)

From 87c0597b05017d532ee81e8768f7d383b4e37a92 Mon Sep 17 00:00:00 2001
From: Zach Young <zacharysyoung@gmail.com>
Date: Wed, 24 Apr 2024 15:27:32 -0700
Subject: [PATCH 4/4] updated GoDoc badge link; removed superfluous link

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 295867e..334fb43 100644
--- a/README.md
+++ b/README.md
@@ -3,4 +3,4 @@
 [![Build Status](https://travis-ci.org/smartystreets/scanners.svg?branch=master)](https://travis-ci.org/smartystreets/scanners)
 [![Code Coverage](https://codecov.io/gh/smartystreets/scanners/branch/master/graph/badge.svg)](https://codecov.io/gh/smartystreets/scanners)
 [![Go Report Card](https://goreportcard.com/badge/github.com/smartystreets/scanners)](https://goreportcard.com/report/github.com/smartystreets/scanners)
-[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/badge/github.com/smartystreets/scanners)
+[![GoDoc](https://pkg.go.dev/badge/github.com/smartystreets/scanners)](https://pkg.go.dev/github.com/smartystreets/scanners)