From 726e7915c5012c33689d9b179cbdc1583efe10cd Mon Sep 17 00:00:00 2001 From: Noboru Saito Date: Sat, 26 Oct 2024 11:03:41 +0900 Subject: [PATCH] Add Text Format. Add Text Format, which has 1 Column per line. --- cmd/cmd.go | 8 +++- input_text.go | 65 +++++++++++++++++++++++++++++ input_text_test.go | 101 +++++++++++++++++++++++++++++++++++++++++++++ reader.go | 3 ++ trdsql.go | 3 ++ trdsql_test.go | 28 +++++++++++++ 6 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 input_text.go create mode 100644 input_text_test.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 0eba1eb..128e90b 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -124,6 +124,7 @@ func (cli Cli) Run(args []string) int { flags.IntVar(&inLimitRead, "ilr", 0, "limited number of rows to read.") flags.StringVar(&inJQuery, "ijq", "", "jq expression string for input(JSON/JSONL only).") flags.Var(&inNull, "inull", "value(string) to convert to null on input.") + flags.BoolVar(&inRowNumber, "inum", false, "add row number column.") flags.BoolVar(&inFlag.CSV, "icsv", false, "CSV format for input.") flags.BoolVar(&inFlag.LTSV, "iltsv", false, "LTSV format for input.") @@ -131,7 +132,7 @@ func (cli Cli) Run(args []string) int { flags.BoolVar(&inFlag.YAML, "iyaml", false, "YAML format for input.") flags.BoolVar(&inFlag.TBLN, "itbln", false, "TBLN format for input.") flags.BoolVar(&inFlag.WIDTH, "iwidth", false, "width specification format for input.") - flags.BoolVar(&inRowNumber, "inum", false, "add row number column.") + flags.BoolVar(&inFlag.TEXT, "itext", false, "text format for input.") flags.StringVar(&outFile, "out", "", "output file name.") flags.BoolVar(&outWithoutGuess, "out-without-guess", false, "output without guessing (when using -out).") @@ -501,6 +502,7 @@ type inputFlag struct { YAML bool TBLN bool WIDTH bool + TEXT bool } // inputFormat returns format from flag. @@ -518,6 +520,8 @@ func inputFormat(i inputFlag) trdsql.Format { return trdsql.TBLN case i.WIDTH: return trdsql.WIDTH + case i.TEXT: + return trdsql.TEXT default: return trdsql.GUESS } @@ -525,7 +529,7 @@ func inputFormat(i inputFlag) trdsql.Format { func isInFormat(name string) bool { switch name { - case "ig", "icsv", "iltsv", "ijson", "iyaml", "itbln", "iwidth": + case "ig", "icsv", "iltsv", "ijson", "iyaml", "itbln", "iwidth", "itext": return true } return false diff --git a/input_text.go b/input_text.go new file mode 100644 index 0000000..71596ac --- /dev/null +++ b/input_text.go @@ -0,0 +1,65 @@ +package trdsql + +import ( + "bufio" + "io" + "strings" +) + +// TextReader provides a reader for text format. +type TextReader struct { + reader *bufio.Reader + num int + maxNum int +} + +// NewTextReader returns a new TextReader. +func NewTextReader(reader io.Reader, opts *ReadOpts) (*TextReader, error) { + r := &TextReader{ + reader: bufio.NewReader(reader), + } + + if opts.InSkip > 0 { + skipRead(r, opts.InSkip) + } + + if opts.InLimitRead { + r.maxNum = opts.InPreRead + } + return r, nil +} + +// Names returns column names. +func (r *TextReader) Names() ([]string, error) { + return []string{"text"}, nil +} + +// Types returns column types. +func (r *TextReader) Types() ([]string, error) { + return []string{"text"}, nil +} + +// PreReadRow returns pre-read rows. +func (r *TextReader) PreReadRow() [][]any { + return nil +} + +// ReadRow reads a row. +func (r *TextReader) ReadRow([]any) ([]any, error) { + var builder strings.Builder + for { + if r.maxNum > 0 && r.num >= r.maxNum { + return []any{""}, io.EOF + } + line, isPrefix, err := r.reader.ReadLine() + if err != nil { + return []any{""}, err + } + builder.Write(line) + if isPrefix { + continue + } + r.num++ + return []any{builder.String()}, nil + } +} diff --git a/input_text_test.go b/input_text_test.go new file mode 100644 index 0000000..d1e7a9e --- /dev/null +++ b/input_text_test.go @@ -0,0 +1,101 @@ +package trdsql + +import ( + "io" + "path/filepath" + "reflect" + "strings" + "testing" +) + +func TestNewTextReader(t *testing.T) { + type args struct { + reader io.Reader + opts *ReadOpts + } + tests := []struct { + name string + args args + }{ + { + name: "test1", + args: args{ + reader: strings.NewReader("a\nb\nc\n"), + opts: NewReadOpts(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := NewTextReader(tt.args.reader, tt.args.opts) + if err != nil { + t.Fatal(err) + } + names, err := got.Names() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(names, []string{"text"}) { + t.Errorf("TextReader.Names() != text %v", names) + } + types, err := got.Types() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(types, []string{"text"}) { + t.Errorf("TextReader.Types() != text %v", types) + } + }) + } +} + +func TestTextReaderFile(t *testing.T) { + tests := []struct { + name string + fileName string + opts *ReadOpts + want []any + wantErr bool + }{ + { + name: "test.csv", + fileName: "test.csv", + opts: NewReadOpts(), + want: []any{"1,Orange"}, + wantErr: false, + }, + { + name: "test.csv2", + fileName: "test.csv", + opts: &ReadOpts{InSkip: 1}, + want: []any{"2,Melon"}, + wantErr: false, + }, + { + name: "test.csv3", + fileName: "test.csv", + opts: &ReadOpts{InLimitRead: true, InPreRead: 1}, + want: []any{"1,Orange"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + file, err := singleFileOpen(filepath.Join(dataDir, tt.fileName)) + if err != nil { + t.Error(err) + } + r, err := NewTextReader(file, tt.opts) + if err != nil { + t.Fatal(err) + } + got, err := r.ReadRow(nil) + if (err != nil) != tt.wantErr { + t.Errorf("TextReader.ReadRow() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("TextReader.ReadRow() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/reader.go b/reader.go index 321dfea..bc6b6b7 100644 --- a/reader.go +++ b/reader.go @@ -49,6 +49,9 @@ var readerFuncs = map[Format]ReaderFunc{ WIDTH: func(reader io.Reader, opts *ReadOpts) (Reader, error) { return NewGWReader(reader, opts) }, + TEXT: func(reader io.Reader, opts *ReadOpts) (Reader, error) { + return NewTextReader(reader, opts) + }, } var ( diff --git a/trdsql.go b/trdsql.go index bc168c2..2e328b2 100644 --- a/trdsql.go +++ b/trdsql.go @@ -77,6 +77,9 @@ const ( // Format using guesswidth library. WIDTH + // import + TEXT + // export // Output as it is. // Multiple characters can be selected as delimiter. diff --git a/trdsql_test.go b/trdsql_test.go index ac77996..deb2856 100644 --- a/trdsql_test.go +++ b/trdsql_test.go @@ -554,6 +554,34 @@ func TestTBLNRun(t *testing.T) { } } +func TestTextRun(t *testing.T) { + testText := [][]string{ + {"test.csv", `1,"1,Orange" +2,"2,Melon" +3,"3,Apple" +`}, + {"aiu.csv", "1,あ\n2,い\n3,う\n"}, + } + outStream := new(bytes.Buffer) + importer := NewImporter( + InFormat(TEXT), + InRowNumber(true), + ) + exporter := NewExporter(NewWriter(OutStream(outStream))) + trd := NewTRDSQL(importer, exporter) + for _, c := range testText { + sqlQuery := "SELECT * FROM " + filepath.Join(dataDir, c[0]) + err := trd.Exec(sqlQuery) + if err != nil { + t.Errorf("trdsql error %s", err) + } + if outStream.String() != c[1] { + t.Fatalf("trdsql error %s:%s:%s", c[0], c[1], outStream) + } + outStream.Reset() + } +} + func setOutFormatTRDSQL(outFormat Format, outStream io.Writer) *TRDSQL { importer := NewImporter( InFormat(GUESS),