diff --git a/.gitignore b/.gitignore index 4ca7abc..a1f5f0d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,8 +14,8 @@ *.out -examples/people/parquet.go -examples/people/people.parquet -examples/people/read.py -examples/via_parquet/generated_struct.go -examples/via_parquet/parquet.go \ No newline at end of file +_examples/people/parquet.go +_examples/people/people.parquet +_examples/people/read.py +_examples/via_parquet/generated_struct.go +_examples/via_parquet/parquet.go \ No newline at end of file diff --git a/examples/people/README.md b/_examples/people/README.md similarity index 100% rename from examples/people/README.md rename to _examples/people/README.md diff --git a/examples/people/main.go b/_examples/people/main.go similarity index 100% rename from examples/people/main.go rename to _examples/people/main.go diff --git a/examples/people/people.go b/_examples/people/people.go similarity index 100% rename from examples/people/people.go rename to _examples/people/people.go diff --git a/examples/via_parquet/README.md b/_examples/via_parquet/README.md similarity index 100% rename from examples/via_parquet/README.md rename to _examples/via_parquet/README.md diff --git a/examples/via_parquet/main.go b/_examples/via_parquet/main.go similarity index 100% rename from examples/via_parquet/main.go rename to _examples/via_parquet/main.go diff --git a/examples/via_parquet/people.parquet b/_examples/via_parquet/people.parquet similarity index 100% rename from examples/via_parquet/people.parquet rename to _examples/via_parquet/people.parquet diff --git a/internal/cases/cases.go b/cmd/parquetgen/cases/cases.go similarity index 100% rename from internal/cases/cases.go rename to cmd/parquetgen/cases/cases.go diff --git a/internal/cases/cases_test.go b/cmd/parquetgen/cases/cases_test.go similarity index 93% rename from internal/cases/cases_test.go rename to cmd/parquetgen/cases/cases_test.go index 316d9f7..54e629e 100644 --- a/internal/cases/cases_test.go +++ b/cmd/parquetgen/cases/cases_test.go @@ -4,7 +4,7 @@ import ( "fmt" "testing" - "github.com/parsyl/parquet/internal/cases" + "github.com/parsyl/parquet/cmd/parquetgen/cases" "github.com/stretchr/testify/assert" ) diff --git a/internal/dremel/dremel.go b/cmd/parquetgen/dremel/dremel.go similarity index 58% rename from internal/dremel/dremel.go rename to cmd/parquetgen/dremel/dremel.go index 3c2d8c2..69f0bf3 100644 --- a/internal/dremel/dremel.go +++ b/cmd/parquetgen/dremel/dremel.go @@ -1,7 +1,10 @@ package dremel import ( - "github.com/parsyl/parquet/internal/fields" + "fmt" + "strings" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) // Package dremel generates code that parquetgen @@ -10,14 +13,13 @@ import ( // Write generates the code for initializing a struct // with data from a parquet file. -func Write(i int, fields []fields.Field) string { - f := fields[i] +func Write(f fields.Field) string { if f.Repeated() { - return writeRepeated(i, fields) + return writeRepeated(f) } if f.Optional() { - return writeOptional(i, fields) + return writeOptional(f) } return writeRequired(f) @@ -36,3 +38,9 @@ func Read(f fields.Field) string { return readRequired(f) } + +func writeRequired(f fields.Field) string { + return fmt.Sprintf(`func %s(x *%s, vals []%s) { + x.%s = vals[0] +}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName(), strings.Join(f.FieldNames(), ".")) +} diff --git a/cmd/parquetgen/dremel/dremel_test.go b/cmd/parquetgen/dremel/dremel_test.go new file mode 100644 index 0000000..18b6f2d --- /dev/null +++ b/cmd/parquetgen/dremel/dremel_test.go @@ -0,0 +1,279 @@ +package dremel_test + +import ( + "bytes" + "testing" + + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/doc" + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/person" + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/repetition" + "github.com/stretchr/testify/assert" +) + +var ( + dremelDocs = []doc.Document{ + { + DocID: 10, + Links: &doc.Link{ + Forward: []int64{20, 40, 60}, + }, + Names: []doc.Name{ + { + Languages: []doc.Language{ + {Code: "en-us", Country: pstring("us")}, + {Code: "en"}, + }, + URL: pstring("http://A"), + }, + { + URL: pstring("http://B"), + }, + { + Languages: []doc.Language{ + {Code: "en-gb", Country: pstring("gb")}, + }, + }, + }, + }, + { + DocID: 20, + Links: &doc.Link{ + Backward: []int64{10, 30}, + Forward: []int64{80}, + }, + Names: []doc.Name{ + { + URL: pstring("http://C"), + }, + }, + }, + } +) + +// TestLevels verifies that the example from the dremel paper +// results in the correct definition and repetition levels. +func TestLevels(t *testing.T) { + var buf bytes.Buffer + pw, err := doc.NewParquetWriter(&buf) + if err != nil { + assert.NoError(t, err) + } + + for _, doc := range dremelDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + assert.NoError(t, err) + } + + pw.Close() + + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + assert.NoError(t, err) + } + + expected := []doc.Levels{ + {Name: "docid"}, + {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, + {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, + {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, + {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, + {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, + } + + assert.Equal(t, expected, pr.Levels()) +} + +var ( + people = []person.Person{ + { + Name: "peep", + Hobby: &person.Hobby{ + Name: "napping", + Difficulty: pint32(10), + Skills: []person.Skill{ + {Name: "meditation", Difficulty: "very"}, + {Name: "calmness", Difficulty: "so-so"}, + }, + }, + }, + } +) + +func TestPersonLevels(t *testing.T) { + var buf bytes.Buffer + pw, err := person.NewParquetWriter(&buf) + if err != nil { + assert.NoError(t, err) + } + + for _, p := range people { + pw.Add(p) + } + + if err := pw.Write(); err != nil { + assert.NoError(t, err) + } + + pw.Close() + + pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + assert.NoError(t, err) + } + + expected := []person.Levels{ + {Name: "name"}, + {Name: "hobby.name", Defs: []uint8{1}}, + {Name: "hobby.difficulty", Defs: []uint8{2}}, + {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + } + + assert.Equal(t, expected, pr.Levels()) +} + +// TestDremel uses the example from the dremel paper and writes then +// reads from a parquet file to make sure nested fields work correctly. +func TestDremel(t *testing.T) { + var buf bytes.Buffer + pw, err := doc.NewParquetWriter(&buf) + if err != nil { + t.Fatal(err) + } + + for _, doc := range dremelDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + t.Fatal(err) + } + + pw.Close() + + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatal(err) + } + + var out []doc.Document + for pr.Next() { + var d doc.Document + pr.Scan(&d) + out = append(out, d) + } + + assert.Equal(t, dremelDocs, out) +} + +func pstring(s string) *string { + return &s +} + +func pint32(i int32) *int32 { + return &i +} + +var ( + repetitionDocs = []repetition.Document{ + { + Links: []repetition.Link{ + { + Backward: []repetition.Language{{Codes: []string{"a", "b"}}}, + Forward: []repetition.Language{{Codes: []string{"aa", "bbb"}}}, + }, + { + Backward: nil, + Forward: []repetition.Language{{Codes: []string{"c", "d"}}}, + }, + { + Backward: []repetition.Language{{Countries: []string{"e", "f"}}}, + Forward: nil, + }, + { + Backward: nil, + Forward: []repetition.Language{{Countries: []string{"g", "h"}}}, + }, + { + Backward: []repetition.Language{{Countries: []string{"i", "j"}}}, + Forward: []repetition.Language{{Codes: []string{"k", "l"}}}, + }, + { + Backward: []repetition.Language{ + { + Codes: []string{"m", "n"}, + Countries: []string{"o", "p"}, + }, + { + Codes: []string{"q", "r"}, + Countries: []string{"s", "t"}, + }, + }, + Forward: []repetition.Language{{Countries: []string{"u", "v"}}}, + }, + { + Backward: []repetition.Language{{Codes: []string{"w", "x"}}}, + Forward: []repetition.Language{{Countries: []string{"y", "z"}}}, + }, + { + Backward: []repetition.Language{ + { + Codes: []string{"aa"}, + URL: pstring("http://abc.com"), + Countries: []string{"ab"}, + }, + { + URL: pstring("http://abc.com"), + Countries: []string{"ac"}, + }, + { + Codes: []string{"ad"}, + URL: pstring("http://abc.com"), + }, + }, + Forward: []repetition.Language{ + { + Countries: []string{"y", "z"}, + URL: pstring("http://abc.com"), + }, + }, + }, + }, + }, + } +) + +func TestRepetition(t *testing.T) { + var buf bytes.Buffer + pw, err := repetition.NewParquetWriter(&buf) + if err != nil { + t.Fatal(err) + } + + for _, doc := range repetitionDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + t.Fatal(err) + } + + pw.Close() + + pr, err := repetition.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatal(err) + } + + var out []repetition.Document + for pr.Next() { + var d repetition.Document + pr.Scan(&d) + out = append(out, d) + } + + assert.Equal(t, repetitionDocs, out) +} diff --git a/cmd/parquetgen/dremel/read.go b/cmd/parquetgen/dremel/read.go new file mode 100644 index 0000000..b461b44 --- /dev/null +++ b/cmd/parquetgen/dremel/read.go @@ -0,0 +1,58 @@ +package dremel + +import ( + "fmt" + "strings" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" +) + +func readRequired(f fields.Field) string { + return fmt.Sprintf(`func read%s(x %s) %s { + return x.%s +}`, strings.Join(f.FieldNames(), ""), f.StructType(), f.TypeName(), strings.Join(f.FieldNames(), ".")) +} + +func readOptional(f fields.Field) string { + var out string + n := f.MaxDef() + for def := 0; def < n; def++ { + out += fmt.Sprintf(`case x.%s == nil: + return nil, []uint8{%d}, nil + `, nilField(def, f), def) + } + + var ptr string + rts := f.RepetitionTypes() + if rts[len(rts)-1] == fields.Optional { + ptr = "*" + } + + out += fmt.Sprintf(` default: + return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.Type), ptr, nilField(n, f), n) + + return fmt.Sprintf(`func read%s(x %s) ([]%s, []uint8, []uint8) { + switch { + %s + } + }`, strings.Join(f.FieldNames(), ""), f.StructType(), cleanTypeName(f.Type), out) +} + +func cleanTypeName(s string) string { + return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) +} + +func nilField(i int, f fields.Field) string { + var flds []string + var count int + for j, o := range f.RepetitionTypes() { + flds = append(flds, f.FieldNames()[j]) + if o == fields.Optional { + count++ + } + if count > i { + break + } + } + return strings.Join(flds, ".") +} diff --git a/internal/dremel/read_repeated.go b/cmd/parquetgen/dremel/read_repeated.go similarity index 83% rename from internal/dremel/read_repeated.go rename to cmd/parquetgen/dremel/read_repeated.go index 121e827..ccd77f3 100644 --- a/internal/dremel/read_repeated.go +++ b/cmd/parquetgen/dremel/read_repeated.go @@ -7,7 +7,7 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) func init() { @@ -36,7 +36,7 @@ var ( reps = append(reps, lastRep) } else { for i{{.Rep}}, x{{.Rep}} := range {{.Var}}.{{.Field}} { - if i{{.Rep}} == 1 { + if i{{.Rep}} >= 1 { lastRep = {{inc .Rep}} } %s @@ -69,22 +69,23 @@ func readRepeated(f fields.Field) string { return vals, defs, reps }`, - strings.Join(f.FieldNames, ""), - f.Type, - cleanTypeName(f.TypeName), - cleanTypeName(f.TypeName), + strings.Join(f.FieldNames(), ""), + f.StructType(), + cleanTypeName(f.Type), + cleanTypeName(f.Type), doReadRepeated(f, 0, "x"), ) } func doReadRepeated(f fields.Field, i int, varName string) string { if i == f.MaxDef() { - if f.RepetitionTypes[len(f.RepetitionTypes)-1] == fields.Optional { + rts := f.RepetitionTypes() + if rts[len(rts)-1] == fields.Optional { varName = fmt.Sprintf("*%s", varName) } - if f.RepetitionTypes[len(f.RepetitionTypes)-1] != fields.Repeated { - n := lastRepeated(f.RepetitionTypes) - varName = strings.Join(append([]string{varName}, f.FieldNames[n+1:]...), ".") + if rts[len(rts)-1] != fields.Repeated { + n := lastRepeated(rts) + varName = strings.Join(append([]string{varName}, f.FieldNames()[n+1:]...), ".") } return fmt.Sprintf(`defs = append(defs, %d) reps = append(reps, lastRep) @@ -103,14 +104,14 @@ vals = append(vals, %s)`, i, varName) if rt == fields.Repeated { if reps > 1 { - rc.Field = f.FieldNames[n] + rc.Field = f.FieldNames()[n] } nextVar = fmt.Sprintf("x%d", reps-1) readRepeatedRepeatedTpl.Execute(&buf, rc) } else { nextVar = varName if reps > 0 { - rc.Field = strings.Join(f.FieldNames[i:], ".") + rc.Field = strings.Join(f.FieldNames()[i:], ".") } readRepeatedOptionalTpl.Execute(&buf, rc) } diff --git a/internal/dremel/read_test.go b/cmd/parquetgen/dremel/read_test.go similarity index 58% rename from internal/dremel/read_test.go rename to cmd/parquetgen/dremel/read_test.go index 132e698..bcfd710 100644 --- a/internal/dremel/read_test.go +++ b/cmd/parquetgen/dremel/read_test.go @@ -5,27 +5,33 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/dremel" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/stretchr/testify/assert" ) func TestRead(t *testing.T) { testCases := []struct { - name string - f fields.Field - result string + name string + structName string + f fields.Field + result string }{ { name: "required and not nested", - f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}, + f: fields.Field{ + Type: "int32", Name: "ID", RepetitionType: fields.Required, + }, result: `func readID(x Person) int32 { return x.ID }`, }, { name: "optional and not nested", - f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + ////f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + f: fields.Field{ + Type: "int32", Name: "ID", RepetitionType: fields.Optional, + }, result: `func readID(x Person) ([]int32, []uint8, []uint8) { switch { case x.ID == nil: @@ -37,14 +43,24 @@ func TestRead(t *testing.T) { }, { name: "required and nested", - f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, + f: fields.Field{ + Name: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, result: `func readOtherHobbyDifficulty(x Person) int32 { return x.Other.Hobby.Difficulty }`, }, { name: "optional and nested", - f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + f: fields.Field{ + Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, + }, + }, result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { switch { case x.Hobby == nil: @@ -58,7 +74,11 @@ func TestRead(t *testing.T) { }, { name: "mix of optional and required and nested", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + f: fields.Field{ + Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + }, + }, result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Hobby == nil: @@ -70,7 +90,11 @@ func TestRead(t *testing.T) { }, { name: "mix of optional and required and nested v2", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + f: fields.Field{ + Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }, + }, result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Hobby.Name == nil: @@ -82,7 +106,13 @@ func TestRead(t *testing.T) { }, { name: "mix of optional and require and nested 3 deep", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend == nil: @@ -96,7 +126,13 @@ func TestRead(t *testing.T) { }, { name: "mix of optional and require and nested 3 deep v2", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend.Hobby == nil: @@ -110,7 +146,13 @@ func TestRead(t *testing.T) { }, { name: "mix of optional and require and nested 3 deep v3", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + }}, + }, + }, result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend == nil: @@ -124,7 +166,13 @@ func TestRead(t *testing.T) { }, { name: "nested 3 deep all optional", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend == nil: @@ -140,7 +188,15 @@ func TestRead(t *testing.T) { }, { name: "four deep", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend == nil: @@ -158,7 +214,15 @@ func TestRead(t *testing.T) { }, { name: "four deep mixed", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend.Hobby == nil: @@ -174,7 +238,15 @@ func TestRead(t *testing.T) { }, { name: "four deep mixed v2", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { switch { case x.Friend == nil: @@ -190,7 +262,9 @@ func TestRead(t *testing.T) { }, { name: "repeated", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friends"}, FieldTypes: []string{"string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + f: fields.Field{ + Type: "string", Name: "Friends", RepetitionType: fields.Repeated, + }, result: `func readFriends(x Person) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -201,7 +275,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -214,8 +288,13 @@ func TestRead(t *testing.T) { }`, }, { - name: "readLinkFoward", - f: fields.Field{Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + name: "readLinkFoward", + structName: "Document", + f: fields.Field{ + Name: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int64", Name: "Forward", RepetitionType: fields.Repeated}, + }, + }, result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { var vals []int64 var defs, reps []uint8 @@ -230,7 +309,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Link.Forward { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -244,8 +323,15 @@ func TestRead(t *testing.T) { }`, }, { - name: "readNamesLanguagesCode", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + name: "readNamesLanguagesCode", + structName: "Document", + f: fields.Field{ + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, + }}, + }, + }, result: `func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -256,7 +342,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -264,7 +350,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } defs = append(defs, 2) @@ -279,8 +365,15 @@ func TestRead(t *testing.T) { }`, }, { - name: "readNamesLanguagesCountry", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, + name: "readNamesLanguagesCountry", + structName: "Document", + f: fields.Field{ + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Country", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -291,7 +384,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -299,7 +392,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } if x1.Country == nil { @@ -319,8 +412,13 @@ func TestRead(t *testing.T) { }`, }, { - name: "readNamesURL", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional}}, + name: "readNamesURL", + structName: "Document", + f: fields.Field{ + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "URL", RepetitionType: fields.Optional}, + }, + }, result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -331,7 +429,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.URL == nil { @@ -349,8 +447,15 @@ func TestRead(t *testing.T) { }`, }, { - name: "run of required", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friends", "Name", "Last"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Required}}, + name: "run of required", + structName: "Document", + f: fields.Field{ + Name: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Last", RepetitionType: fields.Required}, + }}, + }, + }, result: `func readFriendsNameLast(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -361,7 +466,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -374,8 +479,15 @@ func TestRead(t *testing.T) { }`, }, { - name: "run of required v2", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friend", "Name", "Aliases"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Repeated}}, + name: "run of required v2", + structName: "Document", + f: fields.Field{ + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Aliases", RepetitionType: fields.Repeated}, + }}, + }, + }, result: `func readFriendNameAliases(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -386,7 +498,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friend.Name.Aliases { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -399,8 +511,17 @@ func TestRead(t *testing.T) { }`, }, { - name: "run of required v3", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Other", "Friends", "Name", "Middle"}, FieldTypes: []string{"Other", "Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required, fields.Required}}, + name: "run of required v3", + structName: "Document", + f: fields.Field{ + Name: "Other", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Middle", RepetitionType: fields.Required}, + }}, + }}, + }, + }, result: `func readOtherFriendsNameMiddle(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -415,7 +536,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Other.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -432,7 +553,12 @@ func TestRead(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - s := dremel.Read(tc.f) + sn := tc.structName + if sn == "" { + sn = "Person" + } + flds := fields.Field{Type: sn, Children: []fields.Field{tc.f}}.Fields() + s := dremel.Read(flds[len(flds)-1]) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/cmd/parquetgen/dremel/testcases/doc/doc.go b/cmd/parquetgen/dremel/testcases/doc/doc.go new file mode 100644 index 0000000..684b991 --- /dev/null +++ b/cmd/parquetgen/dremel/testcases/doc/doc.go @@ -0,0 +1,24 @@ +package doc + +//go:generate parquetgen -input doc.go -type Document -package doc -output generated.go + +type Link struct { + Backward []int64 `parquet:"backward"` + Forward []int64 `parquet:"forward"` +} + +type Language struct { + Code string `parquet:"code"` + Country *string `parquet:"country"` +} + +type Name struct { + Languages []Language `parquet:"languages"` + URL *string `parquet:"url"` +} + +type Document struct { + DocID int64 `parquet:"docid"` + Links *Link `parquet:"link"` + Names []Name `parquet:"names"` +} diff --git a/internal/dremel/dremel_generated_test.go b/cmd/parquetgen/dremel/testcases/doc/generated.go similarity index 93% rename from internal/dremel/dremel_generated_test.go rename to cmd/parquetgen/dremel/testcases/doc/generated.go index 9868cc5..5493d09 100644 --- a/internal/dremel/dremel_generated_test.go +++ b/cmd/parquetgen/dremel/testcases/doc/generated.go @@ -1,4 +1,4 @@ -package dremel_test +package doc // Code generated by github.com/parsyl/parquet. DO NOT EDIT. @@ -21,6 +21,7 @@ type compression int const ( compressionUncompressed compression = 0 compressionSnappy compression = 1 + compressionGzip compression = 2 compressionUnknown compression = -1 ) @@ -45,8 +46,8 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewInt64Field(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), - NewInt64OptionalField(readLinkBackward, writeLinkBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), - NewInt64OptionalField(readLinkForward, writeLinkForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewInt64OptionalField(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewInt64OptionalField(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, optionalFieldCompression(compression)), @@ -61,21 +62,21 @@ func writeDocID(x *Document, vals []int64) { x.DocID = vals[0] } -func readLinkBackward(x Document) ([]int64, []uint8, []uint8) { +func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { var vals []int64 var defs, reps []uint8 var lastRep uint8 - if x.Link == nil { + if x.Links == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Link.Backward) == 0 { + if len(x.Links.Backward) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Link.Backward { - if i0 == 1 { + for i0, x0 := range x.Links.Backward { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -88,7 +89,7 @@ func readLinkBackward(x Document) ([]int64, []uint8, []uint8) { return vals, defs, reps } -func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { +func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -104,13 +105,13 @@ func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 1: - x.Link = &Link{} + x.Links = &Link{} case 2: switch rep { case 0: - x.Link = &Link{Backward: []int64{vals[nVals]}} + x.Links = &Link{Backward: []int64{vals[nVals]}} case 1: - x.Link.Backward = append(x.Link.Backward, vals[nVals]) + x.Links.Backward = append(x.Links.Backward, vals[nVals]) } nVals++ } @@ -119,21 +120,21 @@ func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) return nVals, nLevels } -func readLinkForward(x Document) ([]int64, []uint8, []uint8) { +func readLinksForward(x Document) ([]int64, []uint8, []uint8) { var vals []int64 var defs, reps []uint8 var lastRep uint8 - if x.Link == nil { + if x.Links == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Link.Forward) == 0 { + if len(x.Links.Forward) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Link.Forward { - if i0 == 1 { + for i0, x0 := range x.Links.Forward { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -146,7 +147,7 @@ func readLinkForward(x Document) ([]int64, []uint8, []uint8) { return vals, defs, reps } -func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { +func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -162,10 +163,7 @@ func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 2: - switch rep { - default: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } + x.Links.Forward = append(x.Links.Forward, vals[nVals]) nVals++ } } @@ -183,7 +181,7 @@ func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -191,7 +189,7 @@ func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } defs = append(defs, 2) @@ -224,9 +222,7 @@ func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (in x.Names = append(x.Names, Name{}) case 2: switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: + case 0, 1: x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 2: x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) @@ -248,7 +244,7 @@ func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -256,7 +252,7 @@ func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } if x1.Country == nil { @@ -291,10 +287,7 @@ func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) switch def { case 3: - switch rep { - default: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) nVals++ } } @@ -312,7 +305,7 @@ func readNamesURL(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.URL == nil { @@ -345,10 +338,7 @@ func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { switch def { case 2: - switch rep { - default: - x.Names[ind[0]].URL = pstring(vals[nVals]) - } + x.Names[ind[0]].URL = pstring(vals[nVals]) nVals++ } } @@ -362,6 +352,8 @@ func fieldCompression(c compression) func(*parquet.RequiredField) { return parquet.RequiredFieldUncompressed case compressionSnappy: return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip default: return parquet.RequiredFieldUncompressed } @@ -373,6 +365,8 @@ func optionalFieldCompression(c compression) func(*parquet.OptionalField) { return parquet.OptionalFieldUncompressed case compressionSnappy: return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip default: return parquet.OptionalFieldUncompressed } @@ -438,6 +432,11 @@ func Snappy(p *ParquetWriter) error { return nil } +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + func withCompression(c compression) func(*ParquetWriter) error { return func(p *ParquetWriter) error { p.compression = c diff --git a/cmd/parquetgen/dremel/testcases/person/generated.go b/cmd/parquetgen/dremel/testcases/person/generated.go new file mode 100644 index 0000000..76cd891 --- /dev/null +++ b/cmd/parquetgen/dremel/testcases/person/generated.go @@ -0,0 +1,992 @@ +package person + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "math" + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), + NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), + NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + } +} + +func readName(x Person) string { + return x.Name +} + +func writeName(x *Person, vals []string) { + x.Name = vals[0] +} + +func readHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + default: + return []string{x.Hobby.Name}, []uint8{1}, nil + } +} + +func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby = &Hobby{Name: vals[0]} + return 1, 1 + } + + return 0, 1 +} + +func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + case x.Hobby.Difficulty == nil: + return nil, []uint8{1}, nil + default: + return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil + } +} + +func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Hobby.Difficulty = pint32(vals[0]) + return 1, 1 + } + + return 0, 1 +} + +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 >= 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) + nVals++ + } + } + + return nVals, nLevels +} + +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 >= 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Difficulty) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Person) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Person) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Person) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Person) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type StringField struct { + parquet.RequiredField + vals []string + read func(r Person) string + write func(r *Person, vals []string) + stats *stringStats +} + +func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), + } +} + +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringField) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *StringField) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *StringField) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Person) ([]string, []uint8, []uint8) + write func(r *Person, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Person) ([]string, []uint8, []uint8), write func(r *Person, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type Int32OptionalField struct { + parquet.OptionalField + vals []int32 + read func(r Person) ([]int32, []uint8, []uint8) + write func(r *Person, vals []int32, def, rep []uint8) (int, int) + stats *int32optionalStats +} + +func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { + return &Int32OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newint32optionalStats(maxDef(types)), + } +} + +func (f *Int32OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int32, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int32OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Int32OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type int32optionalStats struct { + min int32 + max int32 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newint32optionalStats(d uint8) *int32optionalStats { + return &int32optionalStats{ + min: int32(math.MaxInt32), + maxDef: d, + } +} + +func (f *int32optionalStats) add(vals []int32, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *int32optionalStats) bytes(val int32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int32optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *int32optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *int32optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *int32optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/cmd/parquetgen/dremel/testcases/person/person.go b/cmd/parquetgen/dremel/testcases/person/person.go new file mode 100644 index 0000000..ce9cef0 --- /dev/null +++ b/cmd/parquetgen/dremel/testcases/person/person.go @@ -0,0 +1,19 @@ +package person + +//go:generate parquetgen -input person.go -type Person -package person -output generated.go + +type Skill struct { + Name string `parquet:"name"` + Difficulty string `parquet:"difficulty"` +} + +type Hobby struct { + Name string `parquet:"name"` + Difficulty *int32 `parquet:"difficulty"` + Skills []Skill `parquet:"skills"` +} + +type Person struct { + Name string `parquet:"name"` + Hobby *Hobby `parquet:"hobby"` +} diff --git a/cmd/parquetgen/dremel/testcases/repetition/generated.go b/cmd/parquetgen/dremel/testcases/repetition/generated.go new file mode 100644 index 0000000..9f06de4 --- /dev/null +++ b/cmd/parquetgen/dremel/testcases/repetition/generated.go @@ -0,0 +1,996 @@ +package repetition + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewStringOptionalField(readLinksBackwardCodes, writeLinksBackwardCodes, []string{"links", "backward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksBackwardURL, writeLinksBackwardURL, []string{"links", "backward", "url"}, []int{2, 2, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksBackwardCountries, writeLinksBackwardCountries, []string{"links", "backward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardCodes, writeLinksForwardCodes, []string{"links", "forward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardURL, writeLinksForwardURL, []string{"links", "forward", "url"}, []int{2, 2, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardCountries, writeLinksForwardCountries, []string{"links", "forward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + } +} + +func readLinksBackwardCodes(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Codes) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Codes { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardCodes(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Links = append(x.Links, Link{}) + case 2: + switch rep { + case 0, 1: + x.Links = append(x.Links, Link{Backward: []Language{{}}}) + case 2: + x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{}) + } + case 3: + switch rep { + case 0, 1: + x.Links = append(x.Links, Link{Backward: []Language{{Codes: []string{vals[nVals]}}}}) + case 2: + x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksBackwardURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if x1.URL == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.URL) + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Backward[ind[1]].URL = pstring(vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksBackwardCountries(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Countries) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Countries { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForwardCodes(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Codes) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Codes { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardCodes(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) + case 3: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForwardURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if x1.URL == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.URL) + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Forward[ind[1]].URL = pstring(vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForwardCountries(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Countries) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Countries { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Document) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Document) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Document) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Document) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Document) ([]string, []uint8, []uint8) + write func(r *Document, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Document) ([]string, []uint8, []uint8), write func(r *Document, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Document) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Document) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/cmd/parquetgen/dremel/testcases/repetition/repetition.go b/cmd/parquetgen/dremel/testcases/repetition/repetition.go new file mode 100644 index 0000000..b400aee --- /dev/null +++ b/cmd/parquetgen/dremel/testcases/repetition/repetition.go @@ -0,0 +1,20 @@ +package repetition + +//go:generate parquetgen -input repetition.go -type Document -package repetition -output generated.go + +type ( + Document struct { + Links []Link `parquet:"links"` + } + + Link struct { + Backward []Language `parquet:"backward"` + Forward []Language `parquet:"forward"` + } + + Language struct { + Codes []string `parquet:"code"` + URL *string `parquet:"url"` + Countries []string `parquet:"countries"` + } +) diff --git a/cmd/parquetgen/dremel/write_optional.go b/cmd/parquetgen/dremel/write_optional.go new file mode 100644 index 0000000..04a9b12 --- /dev/null +++ b/cmd/parquetgen/dremel/write_optional.go @@ -0,0 +1,97 @@ +package dremel + +import ( + "bytes" + "log" + "strings" + "text/template" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" +) + +func init() { + funcs := template.FuncMap{ + "removeStar": func(s string) string { + return strings.Replace(s, "*", "", 1) + }, + "plusOne": func(i int) int { return i + 1 }, + "notNil": func(x *ifElse) bool { return x != nil }, + } + + var err error + writeTpl, err = template.New("output").Funcs(funcs).Parse(`func write{{.FuncName}}(x *{{.Field.StructType}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { {{range $i, $case := .Cases}} + case {{$case.Def}}: + {{$case.Val}}{{if $case.MaxDef}} + return 1, 1{{end}}{{end}} + } + + return 0, 1 +}`) + if err != nil { + log.Fatal(err) + } + + ifelseStmt = `{{define "ifelse"}}if {{.If.Cond}} { + {{.If.Val}} +} {{range $else := .ElseIf}} else if {{$else.Cond}} { + {{$else.Val}} +}{{end}} {{if notNil .Else}} else { + {{.Else.Val}} +} {{end}}{{end}}` + + writeTpl, err = writeTpl.Parse(ifelseStmt) + if err != nil { + log.Fatal(err) + } +} + +var ( + writeTpl *template.Template + ifelseStmt string +) + +type writeInput struct { + fields.Field + Cases []defCases + FuncName string +} + +type ifElse struct { + Cond string + Val string +} + +type defCases struct { + Def int + MaxDef bool + Val *string + RepCases []string +} + +func writeOptional(f fields.Field) string { + wi := writeInput{ + Field: f, + FuncName: strings.Join(f.FieldNames(), ""), + Cases: writeOptionalCases(f), + } + + var buf bytes.Buffer + err := writeTpl.Execute(&buf, wi) + if err != nil { + log.Fatal(err) //TODO: return error + } + return string(buf.Bytes()) +} + +func writeOptionalCases(f fields.Field) []defCases { + md := f.MaxDef() + cases := writeCases(f) + out := make([]defCases, len(cases)) + for i, def := range cases { + s := f.Init(def, 0) + out[i] = defCases{Def: def, Val: &s, MaxDef: def == md} + } + return out +} diff --git a/cmd/parquetgen/dremel/write_repeated.go b/cmd/parquetgen/dremel/write_repeated.go new file mode 100644 index 0000000..de0935a --- /dev/null +++ b/cmd/parquetgen/dremel/write_repeated.go @@ -0,0 +1,144 @@ +package dremel + +import ( + "bytes" + "fmt" + "log" + "strings" + "text/template" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" +) + +var ( + writeRepeatedTpl *template.Template + ifTpl *template.Template +) + +type defCase struct { + Def int + Seen []fields.RepetitionType + Field fields.Field +} + +type writeRepeatedInput struct { + Field fields.Field + Defs []int + Func string +} + +func init() { + funcs := template.FuncMap{ + "removeStar": func(s string) string { + return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) + }, + "newDefCase": func(def int, f fields.Field) defCase { + return defCase{Def: def, Field: f} + }, + "init": initRepeated, + "getRep": func(def int, f fields.Field) int { + var rep int + //defindex indead of def? + for _, rt := range f.RepetitionTypes()[:f.DefIndex(def)] { + if rt == fields.Repeated { + rep++ + } + } + return rep + }, + "notNil": func(x *ifElse) bool { return x != nil }, + } + + var err error + ifTpl, err = template.New("tmp").Funcs(funcs).Parse(`{{template "ifelse" .}}`) + if err != nil { + log.Fatalf("unable to create templates: %s", err) + } + ifTpl, err = ifTpl.Parse(ifelseStmt) + if err != nil { + log.Fatalf("unable to create templates: %s", err) + } + + writeRepeatedTpl, err = template.New("output").Funcs(funcs).Parse(`func {{.Func}}(x *{{.Field.StructType}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, {{.Field.MaxRep}}) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + {{template "defSwitch" .}} + } + + return nVals, nLevels +}`) + if err != nil { + log.Fatalf("unable to create templates: %s", err) + } + + defSwitchTpl := `{{define "defSwitch"}}switch def { {{range $i, $def := .Defs}} + case {{$def}}: + {{ template "defCase" newDefCase $def $.Field}}{{if eq $def $.Field.MaxDef}} + nVals++{{end}}{{end}} + }{{end}}` + + defCaseTpl := `{{define "defCase"}}{{$cases := .Field.RepCases $.Def}}{{if $cases.UseRepCase .Field $.Def}}switch rep { +{{range $case := $cases}}{{$case.Case}} + {{init $.Def $case.Rep $.Field}} +{{end}}}{{else}}{{init $.Def 0 $.Field}}{{end}}{{end}}` + + for _, t := range []string{defCaseTpl, defSwitchTpl} { + writeRepeatedTpl, err = writeRepeatedTpl.Parse(t) + if err != nil { + log.Fatal(err) + } + } +} + +func writeRepeated(f fields.Field) string { + wi := writeRepeatedInput{ + Field: f, + Func: fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), + Defs: writeCases(f), + } + + var buf bytes.Buffer + if err := writeRepeatedTpl.Execute(&buf, wi); err != nil { + log.Fatal(err) + } + return string(buf.Bytes()) +} + +func initRepeated(def, rep int, f fields.Field) string { + md := int(f.MaxDef()) + rt := f.RepetitionTypes().Def(def) + + if def < md && rep == 0 && rt == fields.Repeated { + rep = def + } + + return f.Init(def, rep) +} + +func writeCases(f fields.Field) []int { + var out []int + md := f.MaxDef() + chain := fields.Reverse(f.Chain()) + start := 1 + for _, f := range chain { + if f.RepetitionType != fields.Required && f.Defined && start < md { + start++ + } + } + + for def := start; def <= md; def++ { + out = append(out, def) + } + return out +} diff --git a/cmd/parquetgen/dremel/write_test.go b/cmd/parquetgen/dremel/write_test.go new file mode 100644 index 0000000..137f168 --- /dev/null +++ b/cmd/parquetgen/dremel/write_test.go @@ -0,0 +1,786 @@ +package dremel_test + +import ( + "fmt" + "go/format" + "testing" + + "github.com/parsyl/parquet/cmd/parquetgen/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/fields" + "github.com/stretchr/testify/assert" +) + +func TestWrite(t *testing.T) { + testCases := []struct { + structName string + name string + field fields.Field + result string + }{ + { + name: "required and not nested", + field: fields.Field{ + Type: "int32", Name: "ID", RepetitionType: fields.Required, + }, + result: `func writeID(x *Person, vals []int32) { + x.ID = vals[0] +}`, + }, + { + name: "optional and not nested", + field: fields.Field{ + Type: "int32", Name: "ID", RepetitionType: fields.Optional, + }, + result: `func writeID(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.ID = pint32(vals[0]) + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "required and nested", + field: fields.Field{ + Name: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func writeOtherHobbyDifficulty(x *Person, vals []int32) { + x.Other.Hobby.Difficulty = vals[0] +}`, + }, + { + name: "optional and nested", + field: fields.Field{ + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, + }, + }, + result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby = &Hobby{} + case 2: + x.Hobby = &Hobby{Difficulty: pint32(vals[0])} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "optional and nested and seen by an optional fields", + field: fields.Field{ + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, + }, + }, + result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Hobby.Difficulty = pint32(vals[0]) + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "mix of optional and required and nested", + field: fields.Field{ + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + }, + }, + result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby = &Hobby{Name: vals[0]} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "mix of optional and required and nested v2", + field: fields.Field{ + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }, + }, + result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby.Name = pstring(vals[0]) + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "mix of optional and require and nested 3 deep", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend = &Entity{} + case 2: + x.Friend = &Entity{Hobby: Item{Name: pstring(vals[0])}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "mix of optional and required and nested 3 deep v2 and seen by optional field", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Friend.Hobby.Name = pstring(vals[0]) + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "mix of optional and required and nested 3 deep v3", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend = &Entity{} + case 2: + x.Friend = &Entity{Hobby: &Item{Name: vals[0]}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "nested 3 deep all optional", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend = &Entity{} + case 2: + x.Friend = &Entity{Hobby: &Item{}} + case 3: + x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "nested 3 deep all optional and seen by optional field", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Friend.Hobby = &Item{} + case 3: + x.Friend.Hobby = &Item{Name: pstring(vals[0])} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend = &Entity{} + case 2: + x.Friend = &Entity{Hobby: &Item{}} + case 3: + x.Friend = &Entity{Hobby: &Item{Name: &Name{}}} + case 4: + x.Friend = &Entity{Hobby: &Item{Name: &Name{First: pstring(vals[0])}}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep and seen by optional field", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Friend.Hobby = &Item{} + case 3: + x.Friend.Hobby = &Item{Name: &Name{}} + case 4: + x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep mixed", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend.Hobby = &Item{} + case 2: + x.Friend.Hobby = &Item{Name: &Name{}} + case 3: + x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep mixed and seen by a required sub-field", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Required}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend.Hobby = &Item{} + case 2: + x.Friend.Hobby = &Item{Name: &Name{}} + case 3: + x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep mixed v2", + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Friend = &Entity{} + case 2: + x.Friend = &Entity{Hobby: &Item{}} + case 3: + x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "four deep mixed v2 and seen by an optional field", + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + // }, + field: fields.Field{ + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, + result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Friend.Hobby = &Item{} + case 3: + x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}} + return 1, 1 + } + + return 0, 1 +}`, + }, + { + name: "writeLinkBackward", + structName: "Document", + field: fields.Field{ + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Backward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkBackward(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Link = &Link{} + case 2: + switch rep { + case 0: + x.Link = &Link{Backward: []string{vals[nVals]}} + case 1: + x.Link.Backward = append(x.Link.Backward, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "writeLinkFoward", + structName: "Document", + field: fields.Field{ + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Backward", RepetitionType: fields.Repeated}, + {Type: "string", Name: "Forward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Link.Forward = append(x.Link.Forward, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "writeNamesLanguagesCode", + structName: "Document", + field: fields.Field{ + Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Names = append(x.Names, Name{}) + case 2: + switch rep { + case 0, 1: + x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) + case 2: + x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "writeNamesLanguagesCountry", + structName: "Document", + field: fields.Field{ + Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, + {Type: "string", Name: "Country", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "writeFriendsID", + structName: "Person", + field: fields.Field{ + Name: "Friends", Type: "Being", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "int32", Name: "ID", RepetitionType: fields.Required}, + }, + }, + result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Friends = append(x.Friends, Being{ID: vals[nVals]}) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "repeated primitive", + structName: "Document", + field: fields.Field{ + Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated, + }, + result: `func writeLuckyNumbers(x *Document, vals []int64, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "repeated field handled by previous repeated field", + structName: "Document", + field: fields.Field{ + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "ID", RepetitionType: fields.Required}, + {Type: "string", Name: "Forward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Link.Forward = append(x.Link.Forward, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "nested 2 deep", + structName: "Person", + field: fields.Field{ + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Skills", Type: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "string", Name: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "everything is repeated", + structName: "Document", + field: fields.Field{ + Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + }, + result: `func writeLinksForwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +}`, + }, + { + name: "everything is repeated seen at rep 1", + structName: "Doc", + field: fields.Field{ + Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + }, + result: `func writeLinksForwardCodes(x *Doc, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) + case 3: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +}`, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { + ty := tc.structName + if ty == "" { + ty = "Person" + } + flds := fields.Field{Type: ty, Children: []fields.Field{tc.field}}.Fields() + f := flds[len(flds)-1] + s := dremel.Write(f) + gocode, err := format.Source([]byte(s)) + assert.NoError(t, err) + assert.Equal(t, tc.result, string(gocode)) + }) + } +} diff --git a/cmd/parquetgen/fields/fields.go b/cmd/parquetgen/fields/fields.go new file mode 100644 index 0000000..51e1cc8 --- /dev/null +++ b/cmd/parquetgen/fields/fields.go @@ -0,0 +1,533 @@ +package fields + +import ( + "fmt" + "strings" +) + +// Field holds metadata that is required by parquetgen in order +// to generate code. +type Field struct { + Type string + Name string + ColumnName string + RepetitionType RepetitionType + Parent *Field + Children []Field + Embedded bool + NthChild int + Defined bool +} + +type input struct { + Parent string + Val string + Append bool +} + +func (f Field) StructType() string { + if f.Parent == nil { + return f.Type + } + + var typ string + for fld := f.Parent; fld != nil; fld = fld.Parent { + typ = fld.Type + } + return typ +} + +func (f Field) Fields() []Field { + return f.fields(0) +} + +func (f Field) IsRoot() bool { + return f.Parent == nil +} + +func (f Field) fields(i int) []Field { + var out []Field + for j, fld := range f.Children { + fld.NthChild = j + fld.Parent = &f + if fld.Primitive() { + out = append(out, fld) + } else { + out = append(out, fld.fields(i+1)...) + } + } + return out +} + +func (f Field) Chain() []Field { + out := []Field{f} + for fld := f.Parent; fld != nil; fld = fld.Parent { + out = append(out, *fld) + } + var defined bool + for i, fld := range out { + fld.Defined = defined + out[i] = fld + if fld.Parent != nil && fld.NthChild > 0 { + fld.Parent.Defined = true + defined = true + } + } + + return out +} + +func Reverse(out []Field) []Field { + for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] + } + return out +} + +func (f Field) FieldNames() []string { + var out []string + for _, fld := range Reverse(f.Chain()) { + if fld.Name != "" { + out = append(out, fld.Name) + } + } + return out +} + +func (f Field) FieldTypes() []string { + var out []string + for _, fld := range Reverse(f.Chain()) { + if fld.Type != "" { + out = append(out, fld.Type) + } + } + return out +} + +func (f Field) ColumnNames() []string { + var out []string + for _, fld := range Reverse(f.Chain()) { + if fld.ColumnName != "" { + out = append(out, fld.ColumnName) + } + } + return out +} + +func (f Field) RepetitionTypes() RepetitionTypes { + var out []RepetitionType + for _, fld := range Reverse(f.Chain()) { + out = append(out, fld.RepetitionType) + } + return out[1:] +} + +// DefIndex calculates the index of the +// nested field with the given definition level. +func (f Field) DefIndex(def int) int { + var count, i int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + count++ + } + if count == def { + return i + } + i++ + } + return def +} + +// MaxDef cacluates the largest possible definition +// level for the nested field. +func (f Field) MaxDef() int { + var out int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + out++ + } + } + return out +} + +// MaxRep cacluates the largest possible repetition +// level for the nested field. +func (f Field) MaxRep() int { + var out int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated { + out++ + } + } + return out +} + +// MaxRepForDef cacluates the largest possible repetition +// level for the nested field at the given definition level. +func (f Field) MaxRepForDef(def int) int { + var out int + var defs int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated || fld.RepetitionType == Optional { + defs++ + } + + if defs == def { + return out + } + + if fld.RepetitionType == Repeated { + out++ + } + } + return out +} + +// RepCase is used by parquetgen to generate code. +type RepCase struct { + // Case is the code for a switch case (for example: case 0:) + Reps []int + // Rep is the repetition level that is handled by the switch case. + Rep int + + // Repeated is true if any of the fields (including the one at the def level) were repeated + // This allows the def case to not have a rep case for fields that have a repetition somewhere + // in the chain. + Repeated bool +} + +func (r RepCase) Case() string { + return fmt.Sprintf( + "case %s:", + strings.Trim(strings.Replace(fmt.Sprint(r.Reps), " ", ", ", -1), "[]"), + ) +} + +type RepCases []RepCase + +func (r RepCases) UseRepCase(f Field, def int) bool { + if f.Parent.IsRoot() { + return false + } + return len(r) > 1 || + (len(r) == 1 && r[0].Repeated && r[0].Rep < f.MaxRepForDef(def)) +} + +// RepCases returns a RepCase slice based on the field types and +// what sub-fields have already been seen. +func (f Field) RepCases(def int) RepCases { + mr := int(f.MaxRep()) + + var out []RepCase + var defs int + var reps int + rollup := []int{0} + i := 1 + for _, fld := range Reverse(f.Chain()) { + if fld.IsRoot() { + continue + } + + if defs == def && fld.RepetitionType != Required { + break + } + + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + defs++ + } + + if fld.RepetitionType == Repeated && reps < mr && defs <= def { + reps++ + rollup = append(rollup, reps) + } + + if len(rollup) > 0 && (!fld.Defined || (defs == def && fld.RepetitionType != Required)) { + out = append(out, RepCase{Reps: rollup[:], Rep: max(rollup), Repeated: reps > 0}) + rollup = []int{} + } + + i++ + } + return out +} + +// NilField finds the nth field that is optional and returns some +// information about it. +func (f Field) NilField(n int) (string, RepetitionType, int, int) { + var fields []string + var count int + var j, reps int + var o RepetitionType + + fieldNames := f.FieldNames() + for j, o = range f.RepetitionTypes() { + fields = append(fields, fieldNames[j]) + if o == Optional { + count++ + } else if o == Repeated { + count++ + reps++ + } + if count > n { + break + } + } + return strings.Join(fields, "."), o, j, reps +} + +// Child returns a sub-field based on i +func (f Field) Child(i int) Field { + return Reverse(f.Chain())[i] +} + +// Repeated wraps RepetitionTypes.Repeated() +func (f Field) Repeated() bool { + return f.RepetitionTypes().Repeated() +} + +// Optional wraps RepetitionTypes.Optional() +func (f Field) Optional() bool { + return f.RepetitionTypes().Optional() +} + +// Required wraps RepetitionTypes.Required() +func (f Field) Required() bool { + return f.RepetitionTypes().Required() +} + +func (f Field) leftComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { + if fld.RepetitionType == Optional && rep == 0 && !fld.Defined { + return true + } + + if fld.RepetitionType == Repeated && rep > 0 && reps == rep && f.NthChild == 0 { + return true + } + + if defs == maxDef && fld.RepetitionType != Required && f.NthChild == 0 { + return true + } + + //if rep == 0 && fld.RepetitionType != Required && (fld.RepetitionType == Repeated || f.RepetitionType == Repeated) { + if rep == 0 && fld.RepetitionType == Repeated && !fld.Defined { + return true + } + + return false +} + +func (f Field) rightComplete(def, defs, maxDef int) bool { + return def != maxDef && defs >= def +} + +// Init is called by parquetgen's templates to generate the code +// that writes to a struct's field +// +// example: x.Friend.Hobby = &Item{} +func (f Field) Init(def, rep int) string { + maxDef := f.MaxDef() + maxRep := f.MaxRep() + var defs, reps int + var fld Field + + left, right := "%s", "%s" + + chain := f.Chain() + + chain = Reverse(chain) + + var i int + for _, fld = range chain { + if fld.Parent == nil { + continue + } + + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + defs++ + } + + if fld.RepetitionType == Repeated { + reps++ + } + + switch fld.RepetitionType { + case Required: + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) + case Optional: + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) + case Repeated: + if fld.Primitive() || f.leftComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) + } else { + left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.Name, reps-1)) + } + } + + if f.leftComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { + i++ + break + } + + i++ + } + + left = fmt.Sprintf(left, "") + + for j, fld := range chain[i:] { + if j > 0 && (fld.RepetitionType == Optional || fld.RepetitionType == Repeated) { + defs++ + } + + if j > 0 && fld.RepetitionType == Repeated { + reps++ + } + + switch fld.RepetitionType { + case Required: + if fld.Primitive() { + if (fld.Parent.IsRoot() || fld.Parent.Defined) && fld.Parent.RepetitionType == Repeated && (rep == 0 || rep == reps) { //Should this be a check for repeated anywhere in the full chain? + right = fmt.Sprintf(right, "vals[nVals]%s") + } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { + right = fmt.Sprintf(right, "vals[0]%s") + } else if fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.Name)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.Name)) + } + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.Name, fld.Type)) + } + case Optional: + if fld.Primitive() { + if f.NthChild == 0 && fld.Parent.Optional() && !fld.Parent.Repeated() { + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[0])%%s", fld.Name, fld.Type)) + } else if fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.Type)) + } else if fld.Parent.Repeated() && f.NthChild == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[nVals])%%s", fld.Name, fld.Type)) + } else if fld.Parent.Repeated() && f.NthChild > 0 { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.Type)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[0])%%s", fld.Type)) + } + } else { + if j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("&%s{%%s}", fld.Type)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: &%s{%%s}", fld.Name, fld.Type)) + } + } + case Repeated: + if fld.Primitive() { + if j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) + } else if !fld.IsRoot() { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.Name, fld.Type)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.Type)) + } + } else { + if rep > 0 && reps == rep || (fld.MaxRepForDef(def) == rep && !strings.Contains(right, "append(")) { + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.Type)) + } else if rep == 0 && j == 0 && !f.rightComplete(def, defs, maxDef) { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{{%%s}}", fld.Type)) + } else if rep == 0 && j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.Type)) + } else if (!f.rightComplete(def, defs, maxDef) && !chain[j+1].Primitive()) || (f.rightComplete(def, defs, maxDef) && def == defs) { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{{%%s}}", fld.Name, fld.Type)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.Name, fld.Type)) + } + } + } + + if f.rightComplete(def, defs, maxDef) { + break + } + } + + right = fmt.Sprintf(right, "") + return fmt.Sprintf("x%s = %s", left, right) +} + +// IsRep is true if this fields is one being repeated +func (f Field) IsRep(rep int) bool { + var reps int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated { + reps++ + } + } + + return reps == rep +} + +// Path creates gocode for initializing a string slice in a go template +func (f Field) Path() string { + names := f.ColumnNames() + out := make([]string, len(names)) + for i, n := range names { + out[i] = fmt.Sprintf(`"%s"`, n) + } + return strings.Join(out, ", ") +} + +// Primitive is called in order to determine if the field is primitive or not. + +func (f Field) Primitive() bool { + _, ok := primitiveTypes[f.Type] + return ok +} + +func (f Field) FieldType() string { + var op string + if f.Optional() || f.Repeated() { + op = "Optional" + } + + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.name, op, "Field") +} + +func (f Field) ParquetType() string { + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.name, "", "Type") +} + +func (f Field) Category() string { + var op string + if f.Optional() || f.Repeated() { + op = "Optional" + } + + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.category, op) +} + +func (f Field) TypeName() string { + var star string + if f.RepetitionType == Optional { + star = "*" + } + return fmt.Sprintf("%s%s", star, f.Type) +} + +type fieldType struct { + name string + category string +} + +var primitiveTypes = map[string]fieldType{ + "int32": {"Int32%s%s", "numeric%s"}, + "uint32": {"Uint32%s%s", "numeric%s"}, + "int64": {"Int64%s%s", "numeric%s"}, + "uint64": {"Uint64%s%s", "numeric%s"}, + "float32": {"Float32%s%s", "numeric%s"}, + "float64": {"Float64%s%s", "numeric%s"}, + "bool": {"Bool%s%s", "bool%s"}, + "string": {"String%s%s", "string%s"}, +} + +func max(i []int) int { + return i[len(i)-1] +} diff --git a/cmd/parquetgen/fields/fields_test.go b/cmd/parquetgen/fields/fields_test.go new file mode 100644 index 0000000..b44a617 --- /dev/null +++ b/cmd/parquetgen/fields/fields_test.go @@ -0,0 +1,791 @@ +package fields_test + +import ( + "fmt" + "go/format" + "testing" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" + "github.com/stretchr/testify/assert" +) + +func TestNilFields(t *testing.T) { + type testInput struct { + f fields.Field + expected []string + } + + testCases := []testInput{ + { + f: fields.Field{Name: "First", RepetitionType: fields.Optional, Parent: &fields.Field{ + Name: "Name", RepetitionType: fields.Required, Parent: &fields.Field{Name: "Friends", RepetitionType: fields.Repeated}}}, + expected: []string{ + "Friends", + "Friends.Name.First", + }, + }, + { + f: fields.Field{Name: "First", RepetitionType: fields.Optional, Parent: &fields.Field{Name: "Name", RepetitionType: fields.Required, Parent: &fields.Field{Name: "Friend", RepetitionType: fields.Required}}}, + expected: []string{ + "Friend.Name.First", + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { + if !assert.Equal(t, len(tc.expected), tc.f.MaxDef()) { + return + } + + f := fields.Field{Type: "Person", Children: []fields.Field{tc.f}} + + for i := 0; i < f.MaxDef(); i++ { + s, _, _, _ := f.NilField(i) + assert.Equal(t, tc.expected[i], s) + } + }) + } +} + +func TestInit(t *testing.T) { + testCases := []struct { + fields []fields.Field + def int + rep int + expected string + }{ + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, + }}, + }, + rep: 0, + def: 1, + expected: "x.Links = &Link{}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, + }}, + }, + rep: 0, + def: 2, + expected: "x.Links = &Link{Backward: []int64{vals[nVals]}}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Links.Backward = append(x.Links.Backward, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Forward", Type: "int64", RepetitionType: fields.Repeated}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Links.Forward = append(x.Links.Forward, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Names = append(x.Names, Name{})", + }, + { + fields: []fields.Field{ + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + def: 1, + rep: 0, + expected: "x.Link = &Link{}", + }, + { + fields: []fields.Field{ + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Link = &Link{Backward: []string{vals[nVals]}}", + }, + { + fields: []fields.Field{ + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Language", Type: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", + }, + { + fields: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Name.Languages = []Language{{Codes: []string{vals[nVals]}}}", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Language", Type: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Names[ind[0]].Language.Codes = append(x.Names[ind[0]].Language.Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Name.Languages[ind[0]].Codes = append(x.Name.Languages[ind[0]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Thing", Type: "Thing", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Thing.Names[ind[0]].Languages[ind[1]].Codes = append(x.Thing.Names[ind[0]].Languages[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 2, + expected: "x.Friend = &Entity{Hobby: Item{Name: pstring(vals[0])}}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + expected: "x.Friend.Hobby = &Item{Name: pstring(vals[0])}", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, + }}, + }, + def: 1, + expected: "x.Hobby = &Hobby{}", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, + }}, + }, + def: 2, + expected: "x.Hobby = &Hobby{Difficulty: pint32(vals[0])}", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, + }}, + }, + def: 2, + expected: "x.Hobby.Difficulty = pint32(vals[0])", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Required}, + }}, + }, + def: 1, + expected: "x.Hobby = &Hobby{Name: vals[0]}", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }, + def: 1, + expected: "x.Hobby.Name = pstring(vals[0])", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }, + def: 1, + expected: "x.Hobby = &Item{}", + }, + { + fields: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }, + def: 2, + expected: "x.Hobby = &Item{Name: pstring(vals[0])}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 1, + expected: "x.Friend.Hobby = &Item{}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 2, + expected: "x.Friend.Hobby = &Item{}", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Country", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Names = append(x.Names, Name{})", + }, + { + fields: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Zip", Type: "string", RepetitionType: fields.Optional}, + {Name: "Country", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + rep: 0, + expected: "x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, + def: 1, + expected: "x.Friend = &Entity{}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, + def: 2, + expected: "x.Friend = &Entity{Hobby: &Item{}}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, + def: 3, + expected: "x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}}", + }, + { + fields: []fields.Field{ + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Suffix", Type: "string", RepetitionType: fields.Optional}, + {Name: "First", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }, + def: 3, + expected: "x.Friend.Hobby.Name.First = pstring(vals[0])", + }, + { + fields: []fields.Field{ + { + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Skills", Type: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "string", Name: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + }, + def: 2, + rep: 1, + expected: "x.Hobby.Skills[ind[0]].Difficulty = vals[nVals]", + }, + { + fields: []fields.Field{ + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Forward", Type: "int64", RepetitionType: fields.Repeated}, + }}, + }, + rep: 1, + def: 2, + expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Forward", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + rep: 0, + def: 2, + expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated}, + }, + def: 1, + rep: 0, + expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated}, + }, + def: 1, + rep: 1, + expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "A", Type: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "B", Type: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "C", Type: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "D", Type: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "E", Type: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "F", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }}, + }}, + }, + def: 3, + rep: 0, + expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", + }, + { + fields: []fields.Field{ + {Name: "A", Type: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "B", Type: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "C", Type: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "D", Type: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "E", Type: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "x", Type: "string", RepetitionType: fields.Optional}, + {Name: "F", Type: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }}, + }}, + }, + def: 3, + expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Links = append(x.Links, Link{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 2, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 0, + expected: "x.Links = append(x.Links, Link{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Links = append(x.Links, Link{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Links = []Link{{Backward: []Language{{}}}}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Links = append(x.Links, Link{Backward: []Language{{}}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 1, + expected: "x.Links = append(x.Links, Link{Backward: []Language{{Codes: []string{vals[nVals]}}}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 2, + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{Codes: []string{vals[nVals]}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 2, //1 isn't a valid rep because it is handled by Links.Backward.Codes + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d def %d rep %d", i, tc.def, tc.rep), func(t *testing.T) { + fields := fields.Field{Children: tc.fields}.Fields() + field := fields[len(fields)-1] + s := field.Init(tc.def, tc.rep) + gocode, err := format.Source([]byte(s)) + assert.NoError(t, err) + assert.Equal(t, tc.expected, string(gocode)) + }) + } +} diff --git a/internal/fields/repetition.go b/cmd/parquetgen/fields/repetition.go similarity index 91% rename from internal/fields/repetition.go rename to cmd/parquetgen/fields/repetition.go index 1381933..2f280dc 100644 --- a/internal/fields/repetition.go +++ b/cmd/parquetgen/fields/repetition.go @@ -11,6 +11,17 @@ const ( Repeated RepetitionType = 2 ) +func (r RepetitionType) Prefix() string { + switch r { + case Optional: + return "*" + case Repeated: + return "[]" + default: + return "" + } +} + // RepetitionTypes provides several functions used by parquetgen's // go templates to generate code. type RepetitionTypes []RepetitionType @@ -99,15 +110,6 @@ func (r RepetitionTypes) NRepeated(i int) bool { return false } -func reverse(in []field) []field { - flds := append(in[:0:0], in...) - for i := len(flds)/2 - 1; i >= 0; i-- { - opp := len(flds) - 1 - i - flds[i], flds[opp] = flds[opp], flds[i] - } - return flds -} - type rts []RepetitionType func (r rts) add(i int, rts []RepetitionType) rts { diff --git a/internal/fields/templates.go b/cmd/parquetgen/fields/templates.go similarity index 100% rename from internal/fields/templates.go rename to cmd/parquetgen/fields/templates.go diff --git a/internal/gen/funcs.go b/cmd/parquetgen/gen/funcs.go similarity index 79% rename from internal/gen/funcs.go rename to cmd/parquetgen/gen/funcs.go index 9ceefb3..698ed77 100644 --- a/internal/gen/funcs.go +++ b/cmd/parquetgen/gen/funcs.go @@ -5,9 +5,9 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/cases" - "github.com/parsyl/parquet/internal/dremel" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/cases" + "github.com/parsyl/parquet/cmd/parquetgen/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) var ( @@ -23,13 +23,13 @@ var ( }, "dedupe": dedupe, "compressionFunc": func(f fields.Field) string { - if strings.Contains(f.FieldType, "Optional") { + if strings.Contains(f.Category(), "Optional") { return "optionalFieldCompression" } return "fieldCompression" }, "funcName": func(f fields.Field) string { - return strings.Join(f.FieldNames, "") + return strings.Join(f.FieldNames(), "") }, "join": func(names []string) string { return strings.Join(names, ".") @@ -45,11 +45,11 @@ var ( var out []string var intFound, stringFound bool for _, f := range fields { - if !intFound && strings.Contains(f.TypeName, "int") { + if !intFound && strings.Contains(f.Type, "int") { intFound = true out = append(out, `"math"`) } - if !stringFound && strings.Contains(f.TypeName, "string") { + if !stringFound && strings.Contains(f.Type, "string") { stringFound = true out = append(out, `"sort"`) } @@ -58,7 +58,7 @@ var ( }, "maxType": func(f fields.Field) string { var out string - switch f.TypeName { + switch f.Type { case "int32", "*int32": out = "math.MaxInt32" case "int64", "*int64": @@ -74,11 +74,11 @@ var ( } return out }, - "columnName": func(f fields.Field) string { return strings.Join(f.ColumnNames, ".") }, + "columnName": func(f fields.Field) string { return strings.Join(f.ColumnNames(), ".") }, "writeFunc": dremel.Write, "readFunc": dremel.Read, - "writeFuncName": func(f fields.Field) string { return fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")) }, - "readFuncName": func(f fields.Field) string { return fmt.Sprintf("read%s", strings.Join(f.FieldNames, "")) }, + "writeFuncName": func(f fields.Field) string { return fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")) }, + "readFuncName": func(f fields.Field) string { return fmt.Sprintf("read%s", strings.Join(f.FieldNames(), "")) }, "parquetType": func(f fields.Field) string { if f.Optional() { return "parquet.OptionalField" diff --git a/internal/gen/gen.go b/cmd/parquetgen/gen/gen.go similarity index 73% rename from internal/gen/gen.go rename to cmd/parquetgen/gen/gen.go index 354bf41..94def56 100644 --- a/internal/gen/gen.go +++ b/cmd/parquetgen/gen/gen.go @@ -4,14 +4,13 @@ import ( "bytes" "fmt" "go/format" - "log" "os" "text/template" "github.com/parsyl/parquet" - "github.com/parsyl/parquet/internal/fields" - "github.com/parsyl/parquet/internal/parse" - "github.com/parsyl/parquet/internal/structs" + "github.com/parsyl/parquet/cmd/parquetgen/fields" + "github.com/parsyl/parquet/cmd/parquetgen/parse" + "github.com/parsyl/parquet/cmd/parquetgen/structs" sch "github.com/parsyl/parquet/schema" ) @@ -28,31 +27,27 @@ var ( // FromStruct generates a parquet reader and writer based on the struct // of type 'typ' that is defined in the go file at 'pth'. -func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { +func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) error { result, err := parse.Fields(typ, pth) if err != nil { - log.Fatal(err) - } - - for _, err := range result.Errors { - log.Println(err) + return err } if len(result.Errors) > 0 && !ignore { - log.Fatal("not generating parquet.go (-ignore set to false), err: ", result.Errors) + return fmt.Errorf("not generating parquet.go (-ignore set to false), err: %v", result.Errors) } i := input{ Package: pkg, Type: typ, Import: getImport(imp), - Fields: result.Fields, + Parent: result.Parent, } tmpl := template.New("output").Funcs(funcs) tmpl, err = tmpl.Parse(tpl) if err != nil { - log.Fatal(err) + return err } for _, t := range []string{ @@ -73,45 +68,45 @@ func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { var err error tmpl, err = tmpl.Parse(t) if err != nil { - log.Fatal(err) + return err } } var buf bytes.Buffer err = tmpl.Execute(&buf, i) if err != nil { - log.Fatal(err) + return err } gocode, err := format.Source(buf.Bytes()) if err != nil { - log.Fatal(err) + return fmt.Errorf("err: %s, gocode: %s", err, string(buf.Bytes())) } f, err := os.Create(outPth) if err != nil { - log.Fatal(err) + return err } _, err = f.Write(gocode) if err != nil { - log.Fatal(err) + return err } - f.Close() + return f.Close() } // FromParquet generates a go struct, a reader, and a writer based // on the parquet file at 'parq' -func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { +func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) error { pf, err := os.Open(parq) if err != nil { - log.Fatal(err) + return err } footer, err := parquet.ReadMetaData(pf) if err != nil { - log.Fatal("couldn't read footer: ", err) + return fmt.Errorf("couldn't read footer: %s", err) } pf.Close() @@ -119,7 +114,7 @@ func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { tmpl := template.New("output").Funcs(funcs) tmpl, err = tmpl.Parse(structTpl) if err != nil { - log.Fatal(err) + return err } n := newStruct{ @@ -130,66 +125,67 @@ func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { var buf bytes.Buffer err = tmpl.Execute(&buf, n) if err != nil { - log.Fatal(err) + return err } gocode, err := format.Source(buf.Bytes()) if err != nil { - log.Fatal(err) + return err } f, err := os.Create(pth) if err != nil { - log.Fatal(err) + return err } _, err = f.Write(gocode) if err != nil { - log.Fatal(err) + return err } f.Close() - FromStruct(pth, outPth, typ, pkg, imp, ignore) + return FromStruct(pth, outPth, typ, pkg, imp, ignore) } type input struct { Package string Type string Import string - Fields []fields.Field + Parent fields.Field } -func getFieldType(se *sch.SchemaElement) string { +func getFieldType(se *sch.SchemaElement) (string, error) { if se.Type == nil { - log.Fatal("nil parquet schema type") + return "", fmt.Errorf("nil parquet schema type") } s := se.Type.String() out, ok := parquetTypes[s] if !ok { - log.Fatalf("unsupported parquet schema type: %s", s) + return "", fmt.Errorf("unsupported parquet schema type: %s", s) } if se.RepetitionType != nil && *se.RepetitionType == sch.FieldRepetitionType_REPEATED { - log.Fatalf("field %s is FieldRepetitionType_REPEATED, which is currently not supported", se.Name) + return "", fmt.Errorf("field %s is FieldRepetitionType_REPEATED, which is currently not supported", se.Name) } var star string if se.RepetitionType != nil && *se.RepetitionType == sch.FieldRepetitionType_OPTIONAL { star = "*" } - return fmt.Sprintf("%s%s", star, out) + return fmt.Sprintf("%s%s", star, out), nil } func dedupe(flds []fields.Field) []fields.Field { seen := map[string]bool{} out := make([]fields.Field, 0, len(flds)) for _, f := range flds { - _, ok := seen[f.FieldType] + _, ok := seen[f.FieldType()] if !ok { out = append(out, f) - seen[f.FieldType] = true + seen[f.FieldType()] = true } } + return out } diff --git a/internal/gen/template.go b/cmd/parquetgen/gen/template.go similarity index 96% rename from internal/gen/template.go rename to cmd/parquetgen/gen/template.go index ec870ac..0f7bc67 100644 --- a/internal/gen/template.go +++ b/cmd/parquetgen/gen/template.go @@ -16,7 +16,7 @@ import ( "github.com/parsyl/parquet" sch "github.com/parsyl/parquet/schema" {{.Import}} - {{range imports .Fields}}{{.}} + {{range imports .Parent.Fields}}{{.}} {{end}} ) @@ -48,14 +48,14 @@ type ParquetWriter struct { } func Fields(compression compression) []Field { - return []Field{ {{range .Fields}} + return []Field{ {{range .Parent.Fields}} {{template "newField" .}}{{end}} } } -{{range $i, $field := .Fields}}{{readFunc $field}} +{{range $i, $field := .Parent.Fields}}{{readFunc $field}} -{{writeFunc $i $.Fields}} +{{writeFunc $field}} {{end}} @@ -191,7 +191,7 @@ func (p *ParquetWriter) Close() error { return err } -func (p *ParquetWriter) Add(rec {{.Type}}) { +func (p *ParquetWriter) Add(rec {{.Parent.StructType}}) { if p.len == p.max { if p.child == nil { // an error can't happen here @@ -211,10 +211,10 @@ func (p *ParquetWriter) Add(rec {{.Type}}) { } type Field interface { - Add(r {{.Type}}) + Add(r {{.Parent.StructType}}) Write(w io.Writer, meta *parquet.Metadata) error Schema() parquet.Field - Scan(r *{{.Type}}) + Scan(r *{{.Parent.StructType}}) Read(r io.ReadSeeker, pg parquet.Page) error Name() string Levels() ([]uint8, []uint8) @@ -366,7 +366,7 @@ func (p *ParquetReader) Next() bool { return true } -func (p *ParquetReader) Scan(x *{{.Type}}) { +func (p *ParquetReader) Scan(x *{{.Parent.StructType}}) { if p.err != nil { return } @@ -377,7 +377,7 @@ func (p *ParquetReader) Scan(x *{{.Type}}) { } } -{{range dedupe .Fields}} +{{range dedupe .Parent.Fields}} {{if eq .Category "numeric"}} {{ template "numericField" .}} {{end}} @@ -398,7 +398,7 @@ func (p *ParquetReader) Scan(x *{{.Type}}) { {{end}} {{end}} -{{range dedupe .Fields}} +{{range dedupe .Parent.Fields}} {{if eq .Category "numeric"}} {{ template "requiredStats" .}} {{end}} diff --git a/internal/gen/template_bool.go b/cmd/parquetgen/gen/template_bool.go similarity index 79% rename from internal/gen/template_bool.go rename to cmd/parquetgen/gen/template_bool.go index 9dad753..33bce48 100644 --- a/internal/gen/template_bool.go +++ b/cmd/parquetgen/gen/template_bool.go @@ -3,12 +3,12 @@ package gen var boolTpl = `{{define "boolField"}}type BoolField struct { {{parquetType .}} vals []bool - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *boolStats } -func NewBoolField(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*{{parquetType .}})) *BoolField { +func NewBoolField(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*{{parquetType .}})) *BoolField { return &BoolField{ read: read, write: write, @@ -45,7 +45,7 @@ func (f *BoolField) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *BoolField) Scan(r *{{.Type}}) { +func (f *BoolField) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -54,7 +54,7 @@ func (f *BoolField) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *BoolField) Add(r {{.Type}}) { +func (f *BoolField) Add(r {{.StructType}}) { v := f.read(r) f.vals = append(f.vals, v) } diff --git a/internal/gen/template_bool_optional.go b/cmd/parquetgen/gen/template_bool_optional.go similarity index 79% rename from internal/gen/template_bool_optional.go rename to cmd/parquetgen/gen/template_bool_optional.go index ae57161..ed3777e 100644 --- a/internal/gen/template_bool_optional.go +++ b/cmd/parquetgen/gen/template_bool_optional.go @@ -3,12 +3,12 @@ package gen var boolOptionalTpl = `{{define "boolOptionalField"}}type BoolOptionalField struct { parquet.OptionalField vals []bool - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int) stats *boolOptionalStats } -func NewBoolOptionalField(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { +func NewBoolOptionalField(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { return &BoolOptionalField{ read: read, write: write, @@ -32,7 +32,7 @@ func (f *BoolOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *BoolOptionalField) Scan(r *{{.Type}}) { +func (f *BoolOptionalField) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } @@ -45,7 +45,7 @@ func (f *BoolOptionalField) Scan(r *{{.Type}}) { } } -func (f *BoolOptionalField) Add(r {{.Type}}) { +func (f *BoolOptionalField) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) diff --git a/internal/gen/template_optional.go b/cmd/parquetgen/gen/template_optional.go similarity index 84% rename from internal/gen/template_optional.go rename to cmd/parquetgen/gen/template_optional.go index 1d089dc..691948a 100644 --- a/internal/gen/template_optional.go +++ b/cmd/parquetgen/gen/template_optional.go @@ -7,12 +7,12 @@ var optionalNumericTpl = `{{define "optionalField"}} type {{.FieldType}} struct { parquet.OptionalField vals []{{removeStar .TypeName}} - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) stats *{{removeStar .TypeName}}optionalStats } -func New{{.FieldType}}(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *{{.FieldType}} { +func New{{.FieldType}}(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *{{.FieldType}} { return &{{.FieldType}}{ read: read, write: write, @@ -47,7 +47,7 @@ func (f *{{.FieldType}}) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *{{.FieldType}}) Add(r {{.Type}}) { +func (f *{{.FieldType}}) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -55,7 +55,7 @@ func (f *{{.FieldType}}) Add(r {{.Type}}) { f.Reps = append(f.Reps, reps...) } -func (f *{{.FieldType}}) Scan(r *{{.Type}}) { +func (f *{{.FieldType}}) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } diff --git a/internal/gen/template_required.go b/cmd/parquetgen/gen/template_required.go similarity index 83% rename from internal/gen/template_required.go rename to cmd/parquetgen/gen/template_required.go index eba5c97..d8ca5ee 100644 --- a/internal/gen/template_required.go +++ b/cmd/parquetgen/gen/template_required.go @@ -4,12 +4,12 @@ var requiredNumericTpl = `{{define "numericField"}} type {{.FieldType}} struct { vals []{{.TypeName}} parquet.RequiredField - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *{{.TypeName}}stats } -func New{{.FieldType}}(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *{{.FieldType}} { +func New{{.FieldType}}(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *{{.FieldType}} { return &{{.FieldType}}{ read: read, write: write, @@ -44,7 +44,7 @@ func (f *{{.FieldType}}) Write(w io.Writer, meta *parquet.Metadata) error { return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) } -func (f *{{.FieldType}}) Scan(r *{{.Type}}) { +func (f *{{.FieldType}}) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -53,7 +53,7 @@ func (f *{{.FieldType}}) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *{{.FieldType}}) Add(r {{.Type}}) { +func (f *{{.FieldType}}) Add(r {{.Parent.StructType}}) { v := f.read(r) f.stats.add(v) f.vals = append(f.vals, v) diff --git a/internal/gen/template_string.go b/cmd/parquetgen/gen/template_string.go similarity index 84% rename from internal/gen/template_string.go rename to cmd/parquetgen/gen/template_string.go index 11160e4..edd0532 100644 --- a/internal/gen/template_string.go +++ b/cmd/parquetgen/gen/template_string.go @@ -4,12 +4,12 @@ var stringTpl = `{{define "stringField"}} type StringField struct { parquet.RequiredField vals []string - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *stringStats } -func NewStringField(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *StringField { +func NewStringField(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *StringField { return &StringField{ read: read, write: write, @@ -56,7 +56,7 @@ func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { return nil } -func (f *StringField) Scan(r *{{.Type}}) { +func (f *StringField) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -65,7 +65,7 @@ func (f *StringField) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *StringField) Add(r {{.Type}}) { +func (f *StringField) Add(r {{.StructType}}) { v := f.read(r) f.stats.add(v) f.vals = append(f.vals, v) diff --git a/internal/gen/template_string_optional.go b/cmd/parquetgen/gen/template_string_optional.go similarity index 82% rename from internal/gen/template_string_optional.go rename to cmd/parquetgen/gen/template_string_optional.go index daefb9a..9b0a14d 100644 --- a/internal/gen/template_string_optional.go +++ b/cmd/parquetgen/gen/template_string_optional.go @@ -4,12 +4,12 @@ var stringOptionalTpl = `{{define "stringOptionalField"}} type StringOptionalField struct { parquet.OptionalField vals []string - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) stats *stringOptionalStats } -func NewStringOptionalField(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { +func NewStringOptionalField(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { return &StringOptionalField{ read: read, write: write, @@ -22,7 +22,7 @@ func (f *StringOptionalField) Schema() parquet.Field { return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} } -func (f *StringOptionalField) Add(r {{.Type}}) { +func (f *StringOptionalField) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -30,7 +30,7 @@ func (f *StringOptionalField) Add(r {{.Type}}) { f.Reps = append(f.Reps, reps...) } -func (f *StringOptionalField) Scan(r *{{.Type}}) { +func (f *StringOptionalField) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } diff --git a/internal/gen/template_struct.go b/cmd/parquetgen/gen/template_struct.go similarity index 100% rename from internal/gen/template_struct.go rename to cmd/parquetgen/gen/template_struct.go diff --git a/cmd/parquetgen/main.go b/cmd/parquetgen/main.go index ef37ee7..87668fe 100644 --- a/cmd/parquetgen/main.go +++ b/cmd/parquetgen/main.go @@ -8,7 +8,7 @@ import ( "os" "github.com/parsyl/parquet" - "github.com/parsyl/parquet/internal/gen" + "github.com/parsyl/parquet/cmd/parquetgen/gen" sch "github.com/parsyl/parquet/schema" ) @@ -32,14 +32,19 @@ func main() { log.Fatal("choose -parquet or -input, but not both") } + var err error if *metadata { readFooter() } else if *pageheaders { readPageHeaders() } else if *parq == "" { - gen.FromStruct(*pth, *outPth, *typ, *pkg, *imp, *ignore) + err = gen.FromStruct(*pth, *outPth, *typ, *pkg, *imp, *ignore) } else { - gen.FromParquet(*parq, *structOutPth, *outPth, *typ, *pkg, *imp, *ignore) + err = gen.FromParquet(*parq, *structOutPth, *outPth, *typ, *pkg, *imp, *ignore) + } + + if err != nil { + log.Fatal(err) } } diff --git a/cmd/parquetgen/parse/fields_test.go b/cmd/parquetgen/parse/fields_test.go new file mode 100644 index 0000000..04419d8 --- /dev/null +++ b/cmd/parquetgen/parse/fields_test.go @@ -0,0 +1,456 @@ +package parse_test + +import ( + "fmt" + "io/ioutil" + "log" + "testing" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" + "github.com/parsyl/parquet/cmd/parquetgen/parse" + sch "github.com/parsyl/parquet/schema" + "github.com/stretchr/testify/assert" +) + +func init() { + log.SetOutput(ioutil.Discard) +} + +func TestFields(t *testing.T) { + + type testInput struct { + name string + typ string + expected fields.Field + errors []error + } + + testCases := []testInput{ + { + name: "flat", + typ: "Being", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "private fields", + typ: "Private", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "nested struct", + typ: "Nested", + expected: fields.Field{ + Children: []fields.Field{ + {Name: "Being", Type: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "nested struct with name that doesn't match the struct type", + typ: "Nested2", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Being", Name: "Info", ColumnName: "Info", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "2 deep nested struct", + typ: "DoubleNested", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Nested", Name: "Nested", ColumnName: "Nested", Children: []fields.Field{ + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + }, + }, + errors: []error{}, + }, + { + name: "2 deep optional nested struct", + typ: "OptionalDoubleNested", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "OptionalNested", Name: "OptionalNested", ColumnName: "OptionalNested", Children: []fields.Field{ + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + }, + }, + errors: []error{}, + }, + { + name: "optional nested struct", + typ: "OptionalNested", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "optional nested struct v2", + typ: "OptionalNested2", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Thing", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", ColumnName: "Name", RepetitionType: fields.Required}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "unsupported fields", + typ: "Unsupported", + errors: []error{fmt.Errorf("unsupported type &{time Time}")}, + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "unsupported fields mixed in with supported and embedded", + typ: "SupportedAndUnsupported", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + errors: []error{ + fmt.Errorf("unsupported type &{time Time}"), + fmt.Errorf("unsupported type &{time Time}"), + }, + }, + { + name: "embedded", + typ: "Person", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int64", Name: "Sadness", ColumnName: "Sadness", RepetitionType: fields.Optional}, + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "float32", Name: "Funkiness", ColumnName: "Funkiness", RepetitionType: fields.Required}, + {Type: "float32", Name: "Lameness", ColumnName: "Lameness", RepetitionType: fields.Optional}, + {Type: "bool", Name: "Keen", ColumnName: "Keen", RepetitionType: fields.Optional}, + {Type: "uint32", Name: "Birthday", ColumnName: "Birthday", RepetitionType: fields.Required}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "embedded preserve order", + typ: "NewOrderPerson", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int64", Name: "Sadness", ColumnName: "Sadness", RepetitionType: fields.Optional}, + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "float32", Name: "Funkiness", ColumnName: "Funkiness", RepetitionType: fields.Required}, + {Type: "float32", Name: "Lameness", ColumnName: "Lameness", RepetitionType: fields.Optional}, + {Type: "bool", Name: "Keen", ColumnName: "Keen", RepetitionType: fields.Optional}, + {Type: "uint32", Name: "Birthday", ColumnName: "Birthday", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "tags", + typ: "Tagged", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "string", Name: "Name", ColumnName: "name", RepetitionType: fields.Required}, + }, + }, + }, + { + name: "omit tag", + typ: "IgnoreMe", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + }, + }, + }, + { + name: "repeated", + typ: "Slice", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, + }, + }, + }, + { + name: "repeated v2", + typ: "Slice2", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, + }, + }, + }, + { + name: "repeated v2", + typ: "Slice3", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "nested and repeated", + typ: "Slice4", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", ColumnName: "Name", RepetitionType: fields.Required}, + {Type: "int32", Name: "Difficulty", ColumnName: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + }, + { + name: "nested and repeated v2", + typ: "Slice5", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobby", ColumnName: "hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, + }}, + }, + }, + }, + { + name: "repeated and repeated", + typ: "Slice6", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, + }}, + }, + }, + }, + { + name: "nested repeated and repeated", + typ: "Slice7", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Slice6", Name: "Thing", ColumnName: "thing", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, + }}, + }}, + }, + }, + }, + { + name: "dremel paper example", + typ: "Document", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "int64", Name: "DocID", ColumnName: "DocID", RepetitionType: fields.Required}, + {Type: "Link", Name: "Links", ColumnName: "Links", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "int64", Name: "Backward", ColumnName: "Backward", RepetitionType: fields.Repeated}, + {Type: "int64", Name: "Forward", ColumnName: "Forward", RepetitionType: fields.Repeated}, + }}, + {Type: "Name", Name: "Names", ColumnName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", ColumnName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "string", Name: "Country", ColumnName: "Country", RepetitionType: fields.Optional}, + }}, + {Type: "string", Name: "URL", ColumnName: "URL", RepetitionType: fields.Optional}, + }}, + }, + }, + }, + { + name: "embedded embedded embedded", + typ: "A", + expected: fields.Field{ + Children: []fields.Field{ + {Name: "D", Type: "int32", ColumnName: "D", RepetitionType: fields.Required}, + {Name: "C", Type: "string", ColumnName: "C", RepetitionType: fields.Required}, + {Name: "B", Type: "bool", ColumnName: "B", RepetitionType: fields.Required}, + {Name: "Name", Type: "string", ColumnName: "Name", RepetitionType: fields.Required}, + }, + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { + out, err := parse.Fields(tc.typ, "./parse_test.go") + assert.Nil(t, err, tc.name) + + if len(tc.errors) == 0 { + tc.errors = nil + } + + if !assert.Equal(t, tc.errors, out.Errors, tc.name) { + return + } + + assert.Equal(t, tc.expected.Children, out.Parent.Children, tc.name) + }) + } +} + +func pint32(i int32) *int32 { + return &i +} + +func prt(rt sch.FieldRepetitionType) *sch.FieldRepetitionType { + return &rt +} + +func pt(t sch.Type) *sch.Type { + return &t +} + +func TestDefIndex(t *testing.T) { + testCases := []struct { + def int + field fields.Field + expected int + }{ + { + def: 1, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + }, + }, + }, + expected: 1, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + }, + }, + }, + expected: 2, + }, + { + def: 0, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + }, + }, + }, + expected: 0, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 2, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 1, + }, + { + def: 1, + field: fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 0, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { + assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) + }) + } +} diff --git a/cmd/parquetgen/parse/parse.go b/cmd/parquetgen/parse/parse.go new file mode 100644 index 0000000..056c366 --- /dev/null +++ b/cmd/parquetgen/parse/parse.go @@ -0,0 +1,271 @@ +package parse + +import ( + "fmt" + "go/parser" + "go/token" + "log" + "strings" + + "go/ast" + + "github.com/parsyl/parquet/cmd/parquetgen/fields" + flds "github.com/parsyl/parquet/cmd/parquetgen/fields" +) + +const letters = "abcdefghijklmnopqrstuvwxyz" + +type field struct { + Field fields.Field + tagNames []string + fieldName string + fieldType string + omit bool + embedded bool + optional bool + repeated bool + err error +} + +// Result holds the fields and errors that are generated +// by reading a go struct. +type Result struct { + // Fields are the fields that will be written to and read from a parquet file. + Parent flds.Field + // Errors is a list of errors that occurred while parsing a struct. + Errors []error +} + +// Fields gets the fields of the given struct. +// pth must be a go file that defines the typ struct. +// Any embedded structs must also be in that same file. +func Fields(typ, pth string) (*Result, error) { + fullTyp := typ + typ = getType(fullTyp) + + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, pth, nil, 0) + if err != nil { + log.Fatal(err) + } + + f := &finder{n: map[string]ast.Node{}} + ast.Walk(visitorFunc(f.findTypes), file) + + if f.n == nil { + return nil, fmt.Errorf("could not find %s", typ) + } + + fields, err := getFields(f.n) + if err != nil { + return nil, err + } + + parent, ok := fields[typ] + if !ok { + return nil, fmt.Errorf("could not find %s", typ) + } + + errs := getChildren(&parent, fields) + + return &Result{ + Parent: flds.Field{Type: typ, Children: parent.Children}, + Errors: errs, + }, nil +} + +func getChildren(parent *flds.Field, fields map[string]flds.Field) []error { + var children []flds.Field + var errs []error + p, ok := fields[parent.Type] + if !ok { + errs = append(errs, fmt.Errorf("could not find %s", parent.Type)) + } + + for _, child := range p.Children { + if child.Primitive() { + children = append(children, child) + continue + } + + f, ok := fields[child.Type] + if !ok { + f, ok = fields[child.Type] + if !ok { + errs = append(errs, fmt.Errorf("unsupported type %+v", child.Type)) + continue + } + } + + errs = append(errs, getChildren(&child, fields)...) + + f.Name = child.Name + f.Type = child.Type + f.ColumnName = child.ColumnName + f.Children = child.Children + f.RepetitionType = child.RepetitionType + + if child.Embedded { + for _, ch := range f.Children { + children = append(children, ch) + } + } else { + children = append(children, f) + } + } + parent.Children = children + return errs +} + +func isPrivate(x *ast.Field) bool { + var s string + if len(x.Names) == 0 { + s = fmt.Sprintf("%s", x.Type) + } else { + s = fmt.Sprintf("%s", x.Names[0]) + } + return strings.Contains(letters, string(s[0])) +} + +func getFields(n map[string]ast.Node) (map[string]fields.Field, error) { + fields := map[string]flds.Field{} + for k, n := range n { + _, ok := n.(*ast.TypeSpec) + if !ok { + continue + } + + parent := flds.Field{ + Type: k, + } + + ast.Inspect(n, func(n ast.Node) bool { + if n == nil { + return false + } + + switch x := n.(type) { + case *ast.Field: + if len(x.Names) == 1 && !isPrivate(x) { + f, skip := getField(x.Names[0].Name, x, nil) + if !skip { + parent.Children = append(parent.Children, f) + } + } else if len(x.Names) == 0 && !isPrivate(x) { + f, skip := getField(fmt.Sprintf("%s", x.Type), x, nil) + f.Embedded = true + if !skip { + parent.Children = append(parent.Children, f) + } + } + } + return true + }) + + fields[k] = parent + } + + return fields, nil +} + +func getType(typ string) string { + parts := strings.Split(typ, ".") + return parts[len(parts)-1] +} + +func getField(name string, x ast.Node, parent *flds.Field) (flds.Field, bool) { + var typ, tag string + var optional, repeated bool + ast.Inspect(x, func(n ast.Node) bool { + switch t := n.(type) { + case *ast.Field: + if t.Tag != nil { + tag = parseTag(t.Tag.Value) + } + typ = fmt.Sprintf("%s", t.Type) + case *ast.ArrayType: + at := n.(*ast.ArrayType) + s := fmt.Sprintf("%v", at.Elt) + typ = s + repeated = true + case *ast.StarExpr: + optional = true + typ = fmt.Sprintf("%s", t.X) + case ast.Expr: + s := fmt.Sprintf("%v", t) + _, ok := types[s] + if ok { + typ = s + } + } + return true + }) + + if tag == "" { + tag = name + } + + rt := fields.Required + if repeated { + rt = fields.Repeated + } else if optional { + rt = fields.Optional + } + + return flds.Field{ + Type: typ, + Name: name, + ColumnName: tag, + RepetitionType: rt, + }, tag == "-" +} + +func parseTag(t string) string { + i := strings.Index(t, `parquet:"`) + if i == -1 { + return "" + } + t = t[i+9:] + return t[:strings.Index(t, `"`)] +} + +type visitorFunc func(n ast.Node) ast.Visitor + +func (f visitorFunc) Visit(n ast.Node) ast.Visitor { + return f(n) +} + +type finder struct { + n map[string]ast.Node +} + +func (f *finder) findTypes(n ast.Node) ast.Visitor { + switch n := n.(type) { + case *ast.ImportSpec: + return visitorFunc(f.findTypes) + case *ast.Package: + return visitorFunc(f.findTypes) + case *ast.File: + return visitorFunc(f.findTypes) + case *ast.GenDecl: + if n.Tok == token.TYPE { + return visitorFunc(f.findTypes) + } + case *ast.TypeSpec: + f.n[n.Name.Name] = n + return visitorFunc(f.findTypes) + } + + return nil +} + +var types = map[string]bool{ + "int32": true, + "uint32": true, + "int64": true, + "uint64": true, + "float32": true, + "float64": true, + "bool": true, + "string": true, +} diff --git a/internal/parse/parse_test.go b/cmd/parquetgen/parse/parse_test.go similarity index 95% rename from internal/parse/parse_test.go rename to cmd/parquetgen/parse/parse_test.go index d592529..6be0a3e 100644 --- a/internal/parse/parse_test.go +++ b/cmd/parquetgen/parse/parse_test.go @@ -46,13 +46,13 @@ type Private struct { name string } -type Nested struct { - Being Being +type Nested2 struct { + Info Being Anniversary *uint64 } -type Nested2 struct { - Info Being +type Nested struct { + Being Being Anniversary *uint64 } @@ -111,12 +111,13 @@ type Slice3 struct { } type Hobby struct { - Name string + Name string + Difficulty int32 } type Slice4 struct { - ID int32 `parquet:"id"` - Hobbies []Hobby + ID int32 `parquet:"id"` + Hobbies []Hobby `parquet:"hobbies"` } type Hobby2 struct { diff --git a/internal/structs/structs.go b/cmd/parquetgen/structs/structs.go similarity index 100% rename from internal/structs/structs.go rename to cmd/parquetgen/structs/structs.go diff --git a/internal/structs/structs_test.go b/cmd/parquetgen/structs/structs_test.go similarity index 97% rename from internal/structs/structs_test.go rename to cmd/parquetgen/structs/structs_test.go index 5faaa71..277778c 100644 --- a/internal/structs/structs_test.go +++ b/cmd/parquetgen/structs/structs_test.go @@ -5,7 +5,7 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/structs" + "github.com/parsyl/parquet/cmd/parquetgen/structs" sch "github.com/parsyl/parquet/schema" "github.com/stretchr/testify/assert" ) @@ -69,7 +69,7 @@ func TestStruct(t *testing.T) { gocode, err := format.Source([]byte(s)) assert.NoError(t, err) if !assert.Equal(t, tc.expected, string(gocode)) { - fmt.Println(string(gocode)) + t.Fatal(string(gocode)) } }) } diff --git a/fields.go b/fields.go index 779f018..dfcb498 100644 --- a/fields.go +++ b/fields.go @@ -11,11 +11,45 @@ import ( "io" "github.com/golang/snappy" - "github.com/parsyl/parquet/internal/fields" "github.com/parsyl/parquet/internal/rle" sch "github.com/parsyl/parquet/schema" ) +// RepetitionType is an enum of the possible +// parquet repetition types +type RepetitionType int + +const ( + Unseen RepetitionType = -1 + Required RepetitionType = 0 + Optional RepetitionType = 1 + Repeated RepetitionType = 2 +) + +type RepetitionTypes []RepetitionType + +// MaxDef returns the largest definition level +func (r RepetitionTypes) MaxDef() uint8 { + var out uint8 + for _, rt := range r { + if rt == Optional || rt == Repeated { + out++ + } + } + return out +} + +// MaxRep returns the largest repetition level +func (r RepetitionTypes) MaxRep() uint8 { + var out uint8 + for _, rt := range r { + if rt == Repeated { + out++ + } + } + return out +} + // RequiredField writes the raw data for required columns type RequiredField struct { pth []string @@ -121,12 +155,12 @@ type OptionalField struct { repeated bool } -func getRepetitionTypes(in []int) fields.RepetitionTypes { - out := make([]fields.RepetitionType, len(in)) +func getRepetitionTypes(in []int) RepetitionTypes { + out := make([]RepetitionType, len(in)) for i, x := range in { - out[i] = fields.RepetitionType(x) + out[i] = RepetitionType(x) } - return fields.RepetitionTypes(out) + return RepetitionTypes(out) } // NewOptionalField creates an optional field @@ -189,21 +223,23 @@ func (f *OptionalField) valsFromDefs(defs []uint8, max uint8) int { func (f *OptionalField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error { buf := bytes.Buffer{} wc := &writeCounter{w: &buf} - err := writeLevels(wc, f.Defs, int32(bits.Len(uint(f.MaxLevels.Def)))) - if err != nil { - return err - } - defLen := wc.n + var repLen int64 if f.repeated { err := writeLevels(wc, f.Reps, int32(bits.Len(uint(f.MaxLevels.Rep)))) if err != nil { return err } + repLen = wc.n } - repLen := wc.n - defLen + err := writeLevels(wc, f.Defs, int32(bits.Len(uint(f.MaxLevels.Def)))) + if err != nil { + return err + } + + defLen := wc.n - repLen wc.Write(vals) l, cl, vals, err := compress(f.compression, buf.Bytes()) @@ -238,21 +274,24 @@ func (f *OptionalField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, erro return nil, nil, err } - defs, l, err := readLevels(bytes.NewBuffer(data), int32(bits.Len(uint(f.MaxLevels.Def)))) - if err != nil { - return nil, nil, err - } + var l int - f.Defs = append(f.Defs, defs[:int(ph.DataPageHeader.NumValues)]...) if f.repeated { reps, l2, err := readLevels(bytes.NewBuffer(data[l:]), int32(bits.Len(uint(f.MaxLevels.Rep)))) if err != nil { return nil, nil, err } - l += l2 f.Reps = append(f.Reps, reps[:int(ph.DataPageHeader.NumValues)]...) + l += l2 } + defs, l2, err := readLevels(bytes.NewBuffer(data[l:]), int32(bits.Len(uint(f.MaxLevels.Def)))) + if err != nil { + return nil, nil, err + } + f.Defs = append(f.Defs, defs[:int(ph.DataPageHeader.NumValues)]...) + l += l2 + n := f.valsFromDefs(defs, uint8(f.MaxLevels.Def)) sizes = append(sizes, n) out = append(out, data[l:]...) diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go deleted file mode 100644 index b5a1785..0000000 --- a/internal/dremel/dremel_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package dremel_test - -import ( - "bytes" - "log" - "testing" - - "github.com/stretchr/testify/assert" -) - -//go:generate parquetgen -input dremel_test.go -type Document -package dremel_test -output dremel_generated_test.go - -var ( - dremelDocs = []Document{ - { - DocID: 10, - Link: &Link{Forward: []int64{20, 40, 60}}, - Names: []Name{ - { - Languages: []Language{ - {Code: "en-us", Country: pstring("us")}, - {Code: "en"}, - }, - URL: pstring("http://A"), - }, - { - URL: pstring("http://B"), - }, - { - Languages: []Language{ - {Code: "en-gb", Country: pstring("gb")}, - }, - }, - }, - }, - { - DocID: 20, - Link: &Link{Backward: []int64{10, 30}, Forward: []int64{80}}, - Names: []Name{{URL: pstring("http://C")}}, - }, - } -) - -// TestLevels verifies that the example from the dremel paper -// results in the correct definition and repetition levels. -func TestLevels(t *testing.T) { - var buf bytes.Buffer - pw, err := NewParquetWriter(&buf) - if err != nil { - assert.NoError(t, err) - } - - for _, doc := range dremelDocs { - pw.Add(doc) - } - - if err := pw.Write(); err != nil { - assert.NoError(t, err) - } - - pw.Close() - - pr, err := NewParquetReader(bytes.NewReader(buf.Bytes())) - if err != nil { - assert.NoError(t, err) - } - - expected := []Levels{ - {Name: "docid"}, - {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, - {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, - {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, - {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, - {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, - } - - assert.Equal(t, expected, pr.Levels()) -} - -// TestDremel uses the example from the dremel paper and writes then -// reads from a parquet file to make sure nested fields work correctly. -func TestDremel(t *testing.T) { - var buf bytes.Buffer - pw, err := NewParquetWriter(&buf) - if err != nil { - log.Fatal(err) - } - - for _, doc := range dremelDocs { - pw.Add(doc) - } - - if err := pw.Write(); err != nil { - log.Fatal(err) - } - - pw.Close() - - pr, err := NewParquetReader(bytes.NewReader(buf.Bytes())) - if err != nil { - log.Fatal(err) - } - - var out []Document - for pr.Next() { - var d Document - pr.Scan(&d) - out = append(out, d) - } - - assert.Equal(t, dremelDocs, out) -} - -type Link struct { - Backward []int64 `parquet:"backward"` - Forward []int64 `parquet:"forward"` -} - -type Language struct { - Code string `parquet:"code"` - Country *string `parquet:"country"` -} - -type Name struct { - Languages []Language `parquet:"languages"` - URL *string `parquet:"url"` -} - -type Document struct { - DocID int64 `parquet:"docid"` - Link *Link `parquet:"link"` - Names []Name `parquet:"names"` -} diff --git a/internal/dremel/read.go b/internal/dremel/read.go deleted file mode 100644 index e17f39f..0000000 --- a/internal/dremel/read.go +++ /dev/null @@ -1,41 +0,0 @@ -package dremel - -import ( - "fmt" - "strings" - - "github.com/parsyl/parquet/internal/fields" -) - -func readRequired(f fields.Field) string { - return fmt.Sprintf(`func read%s(x %s) %s { - return x.%s -}`, strings.Join(f.FieldNames, ""), f.Type, f.TypeName, strings.Join(f.FieldNames, ".")) -} - -func readOptional(f fields.Field) string { - var out string - n := defs(f) - for def := 0; def < n; def++ { - out += fmt.Sprintf(`case x.%s == nil: - return nil, []uint8{%d}, nil -`, nilField(def, f), def) - } - - var ptr string - if f.RepetitionTypes[len(f.RepetitionTypes)-1] == fields.Optional { - ptr = "*" - } - out += fmt.Sprintf(` default: - return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.TypeName), ptr, nilField(n, f), n) - - return fmt.Sprintf(`func read%s(x %s) ([]%s, []uint8, []uint8) { - switch { - %s - } -}`, strings.Join(f.FieldNames, ""), f.Type, cleanTypeName(f.TypeName), out) -} - -func cleanTypeName(s string) string { - return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) -} diff --git a/internal/dremel/write_optional.go b/internal/dremel/write_optional.go deleted file mode 100644 index edc6dfd..0000000 --- a/internal/dremel/write_optional.go +++ /dev/null @@ -1,186 +0,0 @@ -package dremel - -import ( - "bytes" - "fmt" - "log" - "strings" - "text/template" - - "github.com/parsyl/parquet/internal/fields" -) - -func init() { - funcs := template.FuncMap{ - "removeStar": func(s string) string { - return strings.Replace(s, "*", "", 1) - }, - "plusOne": func(i int) int { return i + 1 }, - "notNil": func(x *ifElse) bool { return x != nil }, - } - - var err error - writeTpl, err = template.New("output").Funcs(funcs).Parse(`func write{{.FuncName}}(x *{{.Field.Type}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { {{range $i, $case := .Cases}}{{$def:=plusOne $i}} - case {{$def}}: - {{$defIndex := $.Field.DefIndex $def}}{{if $case.UseIf}}{{template "ifelse" $case}}{{else}}{{$case.Val}}{{end}}{{if eq $def $.MaxDef}} - return 1, 1{{end}}{{end}} - } - - return 0, 1 -}`) - if err != nil { - log.Fatal(err) - } - - ifelseStmt = `{{define "ifelse"}}if {{.If.Cond}} { - {{.If.Val}} -} {{range $else := .ElseIf}} else if {{$else.Cond}} { - {{$else.Val}} -}{{end}} {{if notNil .Else}} else { - {{.Else.Val}} -} {{end}}{{end}}` - - writeTpl, err = writeTpl.Parse(ifelseStmt) - if err != nil { - log.Fatal(err) - } -} - -var ( - writeTpl *template.Template - ifelseStmt string -) - -type writeInput struct { - fields.Field - Cases []ifElses - FuncName string -} - -type ifElse struct { - Cond string - Val string -} - -type ifElses struct { - If ifElse - ElseIf []ifElse - Else *ifElse - Val *string -} - -func (i ifElses) UseIf() bool { - return i.Val == nil -} - -func writeOptional(i int, flds []fields.Field) string { - f := flds[i] - s := fields.Seen(i, flds) - f.Seen = s - wi := writeInput{ - Field: f, - FuncName: strings.Join(f.FieldNames, ""), - Cases: writeOptionalCases(f, s), - } - - var buf bytes.Buffer - err := writeTpl.Execute(&buf, wi) - if err != nil { - log.Fatal(err) //TODO: return error - } - return string(buf.Bytes()) -} - -func writeOptionalCases(f fields.Field, seen fields.RepetitionTypes) []ifElses { - var out []ifElses - for def := 1; def <= defs(f); def++ { - if useIfElse(def, 0, seen, f) { - out = append(out, ifelses(def, 0, f)) - } else { - s := f.Init(def, 0) - out = append(out, ifElses{Val: &s}) - } - } - return out -} - -type ifElseCase struct { - f fields.Field - p fields.Field -} - -// ifelses returns an if else block for the given definition and repetition level -func ifelses(def, rep int, orig fields.Field) ifElses { - opts := optionals(def, orig) - var cases ifElseCases - for _, o := range opts { - f := orig.Copy() - f.Seen = seens(o) - cases = append(cases, ifElseCase{f: f, p: f.Parent(o + 1)}) - } - - return cases.ifElses(def, rep, int(orig.MaxDef())) -} - -func seens(i int) fields.RepetitionTypes { - out := make([]fields.RepetitionType, i) - for i := range out { - out[i] = fields.Repeated - } - return fields.RepetitionTypes(out) -} - -type ifElseCases []ifElseCase - -func (i ifElseCases) ifElses(def, rep, md int) ifElses { - out := ifElses{ - If: ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(i[0].p.FieldNames, ".")), - Val: i[0].f.Init(def, rep), - }, - } - - var leftovers []ifElseCase - if len(i) > 1 { - out.Else = &ifElse{ - Val: i[len(i)-1].f.Init(def, rep), - } - if len(i) > 2 { - leftovers = i[1 : len(i)-1] - } - } - - for _, iec := range leftovers { - out.ElseIf = append(out.ElseIf, ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(iec.p.FieldNames, ".")), - Val: iec.f.Init(def, rep), - }) - } - - return out -} - -// optionals returns a slice that contains the index of -// each optional field. -func optionals(def int, f fields.Field) []int { - var out []int - di := f.DefIndex(def) - seen := append(f.Seen[:0:0], f.Seen...) - - if len(seen) > di+1 { - seen = seen[:di+1] - } - - for i, rt := range f.RepetitionTypes[:di+1] { - if rt >= fields.Optional { - out = append(out, i) - } - if i > len(seen)-1 && rt >= fields.Optional { - break - } - } - - return out -} diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go deleted file mode 100644 index 6ee8a1f..0000000 --- a/internal/dremel/write_repeated.go +++ /dev/null @@ -1,209 +0,0 @@ -package dremel - -import ( - "bytes" - "fmt" - "log" - "strings" - "text/template" - - "github.com/parsyl/parquet/internal/fields" -) - -var ( - writeRepeatedTpl *template.Template - ifTpl *template.Template -) - -type defCase struct { - Def int - Seen []fields.RepetitionType - Field fields.Field -} - -func init() { - funcs := template.FuncMap{ - "removeStar": func(s string) string { - return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) - }, - "newDefCase": func(def int, seen []fields.RepetitionType, f fields.Field) defCase { - return defCase{Def: def, Seen: seen, Field: f} - }, - "init": initRepeated, - "getRep": func(def int, f fields.Field) int { - var rep int - //defindex indead of def? - for _, rt := range f.RepetitionTypes[:f.DefIndex(def)] { - if rt == fields.Repeated { - rep++ - } - } - return rep - }, - "notNil": func(x *ifElse) bool { return x != nil }, - } - - var err error - ifTpl, err = template.New("tmp").Funcs(funcs).Parse(`{{template "ifelse" .}}`) - if err != nil { - log.Fatalf("unable to create templates: %s", err) - } - ifTpl, err = ifTpl.Parse(ifelseStmt) - if err != nil { - log.Fatalf("unable to create templates: %s", err) - } - - writeRepeatedTpl, err = template.New("output").Funcs(funcs).Parse(`func {{.Func}}(x *{{.Field.Type}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, {{.Field.MaxRep}}) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - {{template "defSwitch" .}} - } - - return nVals, nLevels -}`) - if err != nil { - log.Fatalf("unable to create templates: %s", err) - } - - defSwitchTpl := `{{define "defSwitch"}}switch def { {{range $i, $def := .Defs}} - case {{$def}}: - {{ template "defCase" newDefCase $def $.Seen $.Field}}{{if eq $def $.Field.MaxDef}} - nVals++{{end}}{{end}} - }{{end}}` - - defCaseTpl := `{{define "defCase"}}{{if eq .Def .Field.MaxDef}}{{template "repSwitch" .}}{{else}}{{$rep:=getRep .Def .Field}}{{init .Def $rep .Seen .Field}}{{end}}{{end}}` - - repSwitchTpl := `{{define "repSwitch"}}switch rep { -{{range $case := .Field.RepCases $.Seen}}{{$case.Case}} -{{init $.Def $case.Rep $.Seen $.Field}} -{{end}} } {{end}}` - - for _, t := range []string{defCaseTpl, defSwitchTpl, repSwitchTpl} { - writeRepeatedTpl, err = writeRepeatedTpl.Parse(t) - if err != nil { - log.Fatal(err) - } - } -} - -type writeRepeatedInput struct { - Field fields.Field - Defs []int - Seen []fields.RepetitionType - Func string -} - -func writeRequired(f fields.Field) string { - return fmt.Sprintf(`func %s(x *%s, vals []%s) { - x.%s = vals[0] -}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")), f.Type, f.TypeName, strings.Join(f.FieldNames, ".")) -} - -func writeRepeated(i int, flds []fields.Field) string { - f := flds[i] - f.Seen = fields.Seen(i, flds) - - wi := writeRepeatedInput{ - Field: f, - Func: fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")), - Defs: writeCases(f, f.Seen), - Seen: f.Seen, - } - - var buf bytes.Buffer - writeRepeatedTpl.Execute(&buf, wi) - return string(buf.Bytes()) -} - -func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) string { - md := int(f.MaxDef()) - rt := f.RepetitionTypes.Def(def) - - if def < md && rep == 0 && rt == fields.Repeated { - rep = def - } - - if useIfElse(def, rep, append(seen[:0:0], seen...), f) { - ie := ifelses(def, rep, f) - var buf bytes.Buffer - if err := ifTpl.Execute(&buf, ie); err != nil { - log.Fatalf("unable to execute ifTpl: %s", err) - } - return string(buf.Bytes()) - } - - f.Seen = seen - return f.Init(def, rep) -} - -func useIfElse(def, rep int, seen fields.RepetitionTypes, f fields.Field) bool { - if len(seen) == 0 { - return false - } - - i := f.DefIndex(def) - - if i+1 > len(seen) && f.RepetitionTypes[:len(seen)].Required() { - return false - } - - if len(seen) > i+1 { - seen = seen[:i+1] - } - - if seen.Repeated() || (def == f.MaxDef() && rep > 0) { - return false - } - - return true -} - -func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { - var dfs []int - start := 1 - if seen.Repeated() { - start = 1 + len(seen) - } - - for def := start; def <= f.MaxDef(); def++ { - dfs = append(dfs, def) - } - return dfs -} - -func nilField(i int, f fields.Field) string { - var flds []string - var count int - for j, o := range f.RepetitionTypes { - flds = append(flds, f.FieldNames[j]) - if o == fields.Optional { - count++ - } - if count > i { - break - } - } - return strings.Join(flds, ".") -} - -// count the number of fields in the path that can be optional -func defs(f fields.Field) int { - var out int - for _, o := range f.RepetitionTypes { - if o == fields.Optional || o == fields.Repeated { - out++ - } - } - return out -} diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go deleted file mode 100644 index cc522c9..0000000 --- a/internal/dremel/write_test.go +++ /dev/null @@ -1,637 +0,0 @@ -package dremel_test - -import ( - "fmt" - "go/format" - "testing" - - "github.com/parsyl/parquet/internal/dremel" - "github.com/parsyl/parquet/internal/fields" - "github.com/stretchr/testify/assert" -) - -func TestWrite(t *testing.T) { - testCases := []struct { - name string - // fields is a slice so that the parts of the field's path - // that have been seen already can be included in before the - // the field being tested. The last field is the one being tested. - fields []fields.Field - result string - }{ - { - name: "required and not nested", - fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}}, - result: `func writeID(x *Person, vals []int32) { - x.ID = vals[0] -}`, - }, - { - name: "optional and not nested", - fields: []fields.Field{{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}}, - result: `func writeID(x *Person, vals []int32, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.ID = pint32(vals[0]) - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "required and nested", - fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}}, - result: `func writeOtherHobbyDifficulty(x *Person, vals []int32) { - x.Other.Hobby.Difficulty = vals[0] -}`, - }, - { - name: "optional and nested", - fields: []fields.Field{ - {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - }, - result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Hobby = &Hobby{} - case 2: - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "optional and nested and seen by an optional fields", - fields: []fields.Field{ - {FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - }, - result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } - case 2: - if x.Hobby == nil { - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - } else { - x.Hobby.Difficulty = pint32(vals[0]) - } - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "mix of optional and required and nested", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - }, - result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Hobby = &Hobby{Name: vals[0]} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "mix of optional and required and nested v2", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - }, - result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Hobby.Name = pstring(vals[0]) - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "mix of optional and require and nested 3 deep", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, - }, - result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend = &Entity{} - case 2: - x.Friend = &Entity{Hobby: Item{Name: pstring(vals[0])}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "mix of optional and required and nested 3 deep v2 and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - }, - result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend.Hobby = &Item{} - case 2: - x.Friend.Hobby = &Item{Name: pstring(vals[0])} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "mix of optional and required and nested 3 deep v3", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, - }, - result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend = &Entity{} - case 2: - x.Friend = &Entity{Hobby: &Item{Name: vals[0]}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "nested 3 deep all optional", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - }, - result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend = &Entity{} - case 2: - x.Friend = &Entity{Hobby: &Item{}} - case 3: - x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "nested 3 deep all optional and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - }, - result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } - case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } - case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}} - } else { - x.Friend.Hobby = &Item{Name: pstring(vals[0])} - } - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}}, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend = &Entity{} - case 2: - x.Friend = &Entity{Hobby: &Item{}} - case 3: - x.Friend = &Entity{Hobby: &Item{Name: &Name{}}} - case 4: - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: pstring(vals[0])}}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, - }, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } - case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } - case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{}} - } - case 4: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: pstring(vals[0])}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} - } - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep mixed", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}}, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend.Hobby = &Item{} - case 2: - x.Friend.Hobby = &Item{Name: &Name{}} - case 3: - x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep mixed and seen by a required sub-field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, - }, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend.Hobby = &Item{} - case 2: - x.Friend.Hobby = &Item{Name: &Name{}} - case 3: - x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep mixed v2", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}}, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Friend = &Entity{} - case 2: - x.Friend = &Entity{Hobby: &Item{}} - case 3: - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}} - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "four deep mixed v2 and seen by an optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - }, - result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } - case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } - case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}} - } - return 1, 1 - } - - return 0, 1 -}`, - }, - { - name: "writeLinkBackward", - fields: []fields.Field{ - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - result: `func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Link = &Link{} - case 2: - switch rep { - case 0: - x.Link = &Link{Backward: []int64{vals[nVals]}} - case 1: - x.Link.Backward = append(x.Link.Backward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "writeLinkFoward", - fields: []fields.Field{ - {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - default: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "writeNamesLanguagesCode", - fields: []fields.Field{ - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - }, - result: `func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Names = append(x.Names, Name{}) - case 2: - switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: - x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) - case 2: - x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "writeNamesLanguagesCountry", - fields: []fields.Field{ - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - }, - result: `func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 3: - switch rep { - default: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "writeFriendsID", - fields: []fields.Field{ - {Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}}, - }, - result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - switch rep { - case 0: - x.Friends = []Being{{ID: vals[nVals]}} - case 1: - x.Friends = append(x.Friends, Being{ID: vals[nVals]}) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "repeated primitive", - fields: []fields.Field{ - {Type: "Document", TypeName: "int64", FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - }, - result: `func writeLuckyNumbers(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - switch rep { - case 0: - x.LuckyNumbers = []int64{vals[nVals]} - case 1: - x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - { - name: "repeated field not handled by previous repeated field", - fields: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - if x.Link == nil { - x.Link = &Link{} - } - case 2: - switch rep { - case 0: - if x.Link == nil { - x.Link = &Link{Forward: []int64{vals[nVals]}} - } else { - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } - case 1: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -}`, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - s := dremel.Write(len(tc.fields)-1, tc.fields) - gocode, err := format.Source([]byte(s)) - assert.NoError(t, err) - assert.Equal(t, tc.result, string(gocode)) - }) - } -} diff --git a/internal/fields/fields.go b/internal/fields/fields.go deleted file mode 100644 index efea9cb..0000000 --- a/internal/fields/fields.go +++ /dev/null @@ -1,399 +0,0 @@ -package fields - -import ( - "bytes" - "fmt" - "log" - "strings" -) - -// Field holds metadata that is required by parquetgen in order -// to generate code. -type Field struct { - Type string - RepetitionTypes RepetitionTypes - FieldNames []string - ColumnNames []string - FieldTypes []string - Seen RepetitionTypes - TypeName string - FieldType string - ParquetType string - Category string -} - -type input struct { - Parent string - Val string - Append bool -} - -// Seen counts how many sub-fields have been previously processed -// so that some of the cases and if statements can be skipped when -// re-assembling records -func Seen(i int, flds []Field) []RepetitionType { - f := flds[i] - out := rts([]RepetitionType{}) - - l := len(f.FieldNames) - for _, fld := range flds[:i] { - end := l - if len(fld.FieldNames) <= l { - end = len(fld.FieldNames) - } - for i, n := range fld.FieldNames[:end] { - if n == f.FieldNames[i] { - out = out.add(i, fld.RepetitionTypes) - } - } - } - - return []RepetitionType(out) -} - -// DefIndex calculates the index of the -// nested field with the given definition level. -func (f Field) DefIndex(def int) int { - var count int - for j, o := range f.RepetitionTypes { - if o == Optional || o == Repeated { - count++ - } - if count == def { - return j - } - } - return def -} - -// MaxDef cacluates the largest possible definition -// level for the nested field. -func (f Field) MaxDef() int { - var out int - for _, o := range f.RepetitionTypes { - if o == Optional || o == Repeated { - out++ - } - } - return out -} - -// MaxRep cacluates the largest possible repetition -// level for the nested field. -func (f Field) MaxRep() int { - var out int - for _, o := range f.RepetitionTypes { - if o == Repeated { - out++ - } - } - return out -} - -// RepCase is used by parquetgen to generate code. -type RepCase struct { - // Case is the code for a switch case (for example: case 0:) - Case string - // Rep is the repetition level that is handled by the switch case. - Rep int -} - -// RepCases returns a RepCase slice based on the field types and -// what sub-fields have already been seen. -func (f Field) RepCases(seen RepetitionTypes) []RepCase { - mr := int(f.MaxRep()) - if mr == int(seen.MaxRep()) { - return []RepCase{{Case: "default:"}} - } - - var out []RepCase - for i := 0; i <= mr; i++ { - out = append(out, RepCase{Case: fmt.Sprintf("case %d:", i), Rep: i}) - } - return out -} - -// NilField finds the nth field that is optional and returns some -// information about it. -func (f Field) NilField(n int) (string, RepetitionType, int, int) { - var fields []string - var count int - var j, reps int - var o RepetitionType - - for j, o = range f.RepetitionTypes { - fields = append(fields, f.FieldNames[j]) - if o == Optional { - count++ - } else if o == Repeated { - count++ - reps++ - } - if count > n { - break - } - } - return strings.Join(fields, "."), o, j, reps -} - -// Child returns a sub-field based on i -func (f Field) Child(i int) Field { - return Field{ - RepetitionTypes: f.RepetitionTypes[i:], - FieldNames: f.FieldNames[i:], - FieldTypes: f.FieldTypes[i:], - } -} - -// Parent returns a parent field based on i -func (f Field) Parent(i int) Field { - return Field{ - RepetitionTypes: f.RepetitionTypes[:i], - FieldNames: f.FieldNames[:i], - FieldTypes: f.FieldTypes[:i], - } -} - -// Copy returns a deep copy of the field -func (f Field) Copy() Field { - return Field{ - RepetitionTypes: append(f.RepetitionTypes[:0:0], f.RepetitionTypes...), - FieldNames: append(f.FieldNames[:0:0], f.FieldNames...), - FieldTypes: append(f.FieldTypes[:0:0], f.FieldTypes...), - Seen: append(f.Seen[:0:0], f.Seen...), - } -} - -// Repeated wraps RepetitionTypes.Repeated() -func (f Field) Repeated() bool { - return f.RepetitionTypes.Repeated() -} - -// Optional wraps RepetitionTypes.Optional() -func (f Field) Optional() bool { - return f.RepetitionTypes.Optional() -} - -// Required wraps RepetitionTypes.Required() -func (f Field) Required() bool { - return f.RepetitionTypes.Required() -} - -// Init is called by parquetgen's templates to generate the code -// that writes to a struct's field (for example: x.Friend.Hobby = &Item{}) -func (f Field) Init(def, rep int) string { - md := f.MaxDef() - if rep > 0 { - var count int - s := Seen(1, []Field{f, f}) - for i, rt := range f.RepetitionTypes { - if rt == Repeated { - count++ - } - if count == rep { - f.Seen = s[:i] - } - } - } - - start, end := f.start(def, rep), f.end(def, rep) - flds := make([]field, len(f.RepetitionTypes[start:end])) - - i := start - var j int - var nReps int - for _, rt := range f.RepetitionTypes[start:end] { - if rt == Repeated { - nReps++ - } - flds[j] = field{ - RT: rt, - Name: f.FieldNames[i], - Type: f.FieldTypes[i], - i: i, - start: start, - seen: f.Seen, - rep: rep, - nReps: nReps, - } - - i++ - j++ - } - - // start with the innermost field - flds = reverse(flds) - - var remainder []field - if len(flds) > 1 { - remainder = flds[1:] - } - - if def == md { - if flds[0].Primitive() && f.RepetitionTypes.Repeated() { - flds[0].Val = "vals[nVals]" - } else if flds[0].Primitive() && !f.RepetitionTypes.Repeated() { - flds[0].Val = "vals[0]" - } - } - - in := input{ - Parent: f.parent(start), - Val: flds[0].init(remainder), - Append: f.append(rep, flds[0]), - } - - var buf bytes.Buffer - fieldTpl.Execute(&buf, in) - return buf.String() -} - -func (f Field) append(rep int, fld field) bool { - return rep > 0 || - (f.Seen.Repeated() && fld.RT == Repeated) -} - -func (f Field) parent(start int) string { - names := make([]string, start+1) - var r int - l := len(f.FieldNames[:start+1]) - for i, n := range f.FieldNames[:start+1] { - if i < l-1 && f.RepetitionTypes[i] == Repeated { - n = fmt.Sprintf("%s[ind[%d]]", n, r) - r++ - } - names[i] = n - } - return strings.Join(names, ".") -} - -// Path creates gocode for initializing a string slice in a go template -func (f Field) Path() string { - out := make([]string, len(f.ColumnNames)) - for i, n := range f.ColumnNames { - out[i] = fmt.Sprintf(`"%s"`, n) - } - return strings.Join(out, ", ") -} - -// start calculates which nested field is -// being written to based on the definition -// level and which parent fields have already -// been written to by previous fields with -// common ancestors. -func (f Field) start(def, rep int) int { - di := f.DefIndex(def) - seen := f.Seen - if len(seen) > di { - seen = seen[:di+1] - } - - if len(f.RepetitionTypes)-1 > di { - for _, rt := range f.RepetitionTypes[di+1:] { - if rt >= Optional { - break - } - di++ - } - } - - if rep == 0 { - rep = int(seen.MaxRep()) + 1 - } - - var i, reps int - var rt RepetitionType - for i, rt = range f.RepetitionTypes[:di+1] { - if rt == Required { - continue - } - - if rt == Repeated { - reps++ - } - - if reps == rep { - break - } - - if rt >= Optional && i >= len(seen) { - break - } - } - - return i -} - -func (f Field) end(def, rep int) int { - if def == f.MaxDef() { - return len(f.RepetitionTypes) - } - - s := f.start(def, rep) - - var i int - md := int(f.RepetitionTypes[:s].MaxDef()) - for _, rt := range f.RepetitionTypes[s:] { - if (rt == Optional || rt == Repeated) && i < def-md { - i++ - } - } - return s + i -} - -type field struct { - RT RepetitionType - Name string - Type string - Val string - i int - start int - seen RepetitionTypes - rep int - nReps int -} - -func (f field) init(flds []field) string { - var buf bytes.Buffer - err := initTpl.Execute(&buf, f) - if err != nil { - log.Fatal(err) - } - - if len(flds) == 0 { - return buf.String() - } - - f2 := flds[0] - var flds2 []field - if len(flds) > 1 { - flds2 = flds[1:] - } - - f2.Val = fmt.Sprintf("%s: %s", f.Name, buf.String()) - return f2.init(flds2) -} - -// Slice is called by parquetgen's go templates to determine -// if the field is repeated or not. -func (f field) Slice() bool { - return (f.RT == Repeated && f.i != f.start) || - (f.RT == Repeated && f.rep == 0 && f.i == f.start && !f.seen.NRepeated(f.i+1) && !f.Primitive()) || - (f.RT == Repeated && f.rep == 0 && f.Primitive() && f.i == 0) -} - -// Primitive is called in order to determine if the field is primitive or not. -func (f field) Primitive() bool { - return primitiveTypes[f.Type] -} - -var primitiveTypes = map[string]bool{ - "bool": true, - "int32": true, - "uint32": true, - "int64": true, - "uint64": true, - "float32": true, - "float64": true, - "string": true, -} diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go deleted file mode 100644 index 57b5cae..0000000 --- a/internal/fields/fields_test.go +++ /dev/null @@ -1,446 +0,0 @@ -package fields_test - -import ( - "fmt" - "go/format" - "testing" - - "github.com/parsyl/parquet/internal/fields" - "github.com/stretchr/testify/assert" -) - -func TestFields(t *testing.T) { - testCases := []struct { - field fields.Field - def int - rep int - seen []fields.RepetitionType - expected string - }{ - { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 1, - expected: "x.Link = &Link{}", - }, - { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Link = &Link{Backward: []int64{vals[nVals]}}", - }, - { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", - }, - { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - rep: 0, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", - }, - { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", - }, - { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 1, - expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", - }, - { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 2, - expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", - }, - { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 1, - expected: "x.Hobby = &Hobby{}", - }, - { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 2, - expected: "x.Hobby = &Hobby{Difficulty: pint32(vals[0])}", - }, - { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 2, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Hobby.Difficulty = pint32(vals[0])", - }, - { - field: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - def: 1, - expected: "x.Hobby = &Hobby{Name: vals[0]}", - }, - { - field: fields.Field{TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - def: 1, - expected: "x.Hobby.Name = pstring(vals[0])", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 1, - rep: 1, - expected: "x.Names = append(x.Names, Name{})", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 1, - expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 2, - expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", - }, - { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 1, - rep: 0, - expected: "x.Link = &Link{}", - }, - { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Link = &Link{Backward: []int64{vals[nVals]}}", - }, - { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 1, - rep: 1, - expected: "x.Names = append(x.Names, Name{})", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Names = append(x.Names, Name{Language: Language{Codes: []string{vals[nVals]}}})", - }, - { - - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Name.Languages = append(x.Name.Languages, Language{Codes: []string{vals[nVals]}})", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", - }, - { - - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Name.Languages = []Language{{Codes: []string{vals[nVals]}}}", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 2, - expected: "x.Names[ind[0]].Language.Codes = append(x.Names[ind[0]].Language.Codes, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, - def: 2, - rep: 2, - expected: "x.Name.Languages[ind[0]].Codes = append(x.Name.Languages[ind[0]].Codes, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"Thing", "Names", "Languages", "Codes"}, FieldTypes: []string{"Thing", "Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated, fields.Repeated}}, - def: 3, - rep: 3, - expected: "x.Thing.Names[ind[0]].Languages[ind[1]].Codes = append(x.Thing.Names[ind[0]].Languages[ind[1]].Codes, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 1, - expected: "x.Hobby = &Item{}", - }, - { - field: fields.Field{FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 2, - expected: "x.Hobby = &Item{Name: pstring(vals[0])}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - def: 3, - expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - def: 1, - expected: "x.Friend.Hobby = &Item{}", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - def: 1, - rep: 1, - expected: "x.Names = append(x.Names, Name{})", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - def: 3, - rep: 0, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: "x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - def: 3, - rep: 0, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Names[ind[0]].Languages = []Language{{Country: pstring(vals[nVals])}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 1, - expected: "x.Friend = &Entity{}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 2, - expected: "x.Friend = &Entity{Hobby: &Item{}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 2, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Friend.Hobby = &Item{}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - expected: "x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: "x.Friend.Hobby.Name = &Name{First: vals[0]}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.Friend.Hobby.Name.First = vals[0]", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.Friend.Hobby.Name.First = pstring(vals[0])", - }, - { - field: fields.Field{FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - def: 1, - rep: 0, - expected: "x.LuckyNumbers = []int64{vals[nVals]}", - }, - { - field: fields.Field{FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - def: 1, - rep: 1, - expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: "x.A.B.C.D = []D{{E: E{F: pstring(vals[nVals])}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.A.B.C.D = []D{{E: E{F: pstring(vals[nVals])}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %v def %d rep %d %v", i, tc.field.FieldNames, tc.def, tc.rep, tc.seen), func(t *testing.T) { - field := tc.field - field.Seen = tc.seen - s := field.Init(tc.def, tc.rep) - gocode, err := format.Source([]byte(s)) - assert.NoError(t, err) - assert.Equal(t, tc.expected, string(gocode)) - }) - } -} - -func TestSeen(t *testing.T) { - testCases := []struct { - flds []fields.Field - expected []fields.RepetitionType - }{ - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Optional}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Required}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Repeated}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Optional}}, - {FieldNames: []string{"Link", "Name", "Last"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - }, - expected: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - i := len(tc.flds) - 1 - assert.Equal(t, tc.expected, fields.Seen(i, tc.flds)) - }) - } -} - -func TestChild(t *testing.T) { - f := fields.Field{ - FieldNames: []string{"Friends", "Name", "First"}, - FieldTypes: []string{"Being", "Name", "string"}, - RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}, - } - ch := fields.Field{ - FieldNames: []string{"Name", "First"}, - FieldTypes: []string{"Name", "string"}, - RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}, - } - assert.Equal(t, ch, f.Child(1)) -} - -func TestRepCases(t *testing.T) { - testCases := []struct { - f fields.Field - seen []fields.RepetitionType - expected []fields.RepCase - }{ - { - f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - expected: []fields.RepCase{{Case: "case 0:", Rep: 0}, {Case: "case 1:", Rep: 1}, {Case: "case 2:", Rep: 2}}, - }, - { - f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: []fields.RepCase{{Case: "default:", Rep: 0}}, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - assert.Equal(t, tc.expected, tc.f.RepCases(tc.seen)) - }) - } -} - -func TestNilField(t *testing.T) { - f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} - name, rt, i, reps := f.NilField(1) - assert.Equal(t, "Names.Languages", name) - assert.Equal(t, fields.Repeated, rt) - assert.Equal(t, 1, i) - assert.Equal(t, 2, reps) -} - -func TestField(t *testing.T) { - f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} - assert.True(t, f.Repeated()) - assert.True(t, f.Optional()) - assert.False(t, f.Required()) -} - -func TestRepetitionTypes(t *testing.T) { - rts := fields.RepetitionTypes([]fields.RepetitionType{fields.Repeated, fields.Optional}) - assert.Equal(t, rts.Def(1), fields.Repeated) - assert.Equal(t, rts.Def(2), fields.Optional) -} diff --git a/internal/parse/fields_test.go b/internal/parse/fields_test.go deleted file mode 100644 index 668880d..0000000 --- a/internal/parse/fields_test.go +++ /dev/null @@ -1,363 +0,0 @@ -package parse_test - -import ( - "fmt" - "io/ioutil" - "log" - "testing" - - "github.com/parsyl/parquet/internal/fields" - "github.com/parsyl/parquet/internal/parse" - sch "github.com/parsyl/parquet/schema" - "github.com/stretchr/testify/assert" -) - -func init() { - log.SetOutput(ioutil.Discard) -} - -func TestField(t *testing.T) { - type testInput struct { - f fields.Field - expected []string - } - - testCases := []testInput{ - { - f: fields.Field{FieldNames: []string{"Friends", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}}, - expected: []string{ - "Friends", - "Friends.Name.First", - }, - }, - { - f: fields.Field{FieldNames: []string{"Friend", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Optional}}, - expected: []string{ - "Friend.Name.First", - }, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - if !assert.Equal(t, len(tc.expected), tc.f.MaxDef()) { - return - } - - for i := 0; i < tc.f.MaxDef(); i++ { - s, _, _, _ := tc.f.NilField(i) - assert.Equal(t, tc.expected[i], s) - } - }) - } -} - -func TestFields(t *testing.T) { - - type testInput struct { - name string - typ string - expected []fields.Field - errors []error - } - - testCases := []testInput{ - { - name: "flat", - typ: "Being", - expected: []fields.Field{ - {Type: "Being", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Being", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "private fields", - typ: "Private", - expected: []fields.Field{ - {Type: "Private", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Private", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "nested struct", - typ: "Nested", - expected: []fields.Field{ - {Type: "Nested", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {Type: "Nested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Nested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "nested struct with name that doesn't match the struct type", - typ: "Nested2", - expected: []fields.Field{ - {Type: "Nested2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Info", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Info", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {Type: "Nested2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Info", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Info", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Nested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "2 deep nested struct", - typ: "DoubleNested", - expected: []fields.Field{ - {Type: "DoubleNested", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Nested", "Being", "ID"}, FieldTypes: []string{"Nested", "Being", "int32"}, ColumnNames: []string{"Nested", "Being", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, - {Type: "DoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Nested", "Being", "Age"}, FieldTypes: []string{"Nested", "Being", "int32"}, ColumnNames: []string{"Nested", "Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Optional}}, - {Type: "DoubleNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Nested", "Anniversary"}, FieldTypes: []string{"Nested", "uint64"}, ColumnNames: []string{"Nested", "Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "2 deep optional nested struct", - typ: "OptionalDoubleNested", - expected: []fields.Field{ - {Type: "OptionalDoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"OptionalNested", "Being", "ID"}, FieldTypes: []string{"OptionalNested", "Being", "int32"}, ColumnNames: []string{"OptionalNested", "Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required}}, - {Type: "OptionalDoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"OptionalNested", "Being", "Age"}, FieldTypes: []string{"OptionalNested", "Being", "int32"}, ColumnNames: []string{"OptionalNested", "Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - {Type: "OptionalDoubleNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"OptionalNested", "Anniversary"}, FieldTypes: []string{"OptionalNested", "uint64"}, ColumnNames: []string{"OptionalNested", "Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "optional nested struct", - typ: "OptionalNested", - expected: []fields.Field{ - {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - {Type: "OptionalNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "optional nested struct v2", - typ: "OptionalNested2", - expected: []fields.Field{ - {Type: "OptionalNested2", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Being", "Name"}, FieldTypes: []string{"Thing", "string"}, ColumnNames: []string{"Being", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "OptionalNested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "unsupported fields", - typ: "Unsupported", - errors: []error{fmt.Errorf("unsupported type: Time")}, - expected: []fields.Field{ - {Type: "Unsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Unsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "unsupported fields mixed in with supported and embedded", - typ: "SupportedAndUnsupported", - expected: []fields.Field{ - {Type: "SupportedAndUnsupported", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "SupportedAndUnsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "SupportedAndUnsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "SupportedAndUnsupported", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{ - fmt.Errorf("unsupported type: T1"), - fmt.Errorf("unsupported type: T2"), - }, - }, - { - name: "embedded", - typ: "Person", - expected: []fields.Field{ - {Type: "Person", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "embedded preserve order", - typ: "NewOrderPerson", - expected: []fields.Field{ - {Type: "NewOrderPerson", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "tags", - typ: "Tagged", - expected: []fields.Field{ - {Type: "Tagged", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Tagged", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, ColumnNames: []string{"name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - }, - }, - { - name: "omit tag", - typ: "IgnoreMe", - expected: []fields.Field{ - {Type: "IgnoreMe", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - }, - }, - { - name: "repeated", - typ: "Slice", - expected: []fields.Field{ - {Type: "Slice", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - }, - }, - { - name: "repeated v2", - typ: "Slice2", - expected: []fields.Field{ - {Type: "Slice2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - }, - }, - { - name: "repeated v2", - typ: "Slice3", - expected: []fields.Field{ - {Type: "Slice3", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "nested and repeated", - typ: "Slice4", - expected: []fields.Field{ - {Type: "Slice4", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice4", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Hobbies", "Name"}, FieldTypes: []string{"Hobby", "string"}, ColumnNames: []string{"Hobbies", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required}}, - }, - }, - { - name: "nested and repeated v2", - typ: "Slice5", - expected: []fields.Field{ - {Type: "Slice5", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice5", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobby", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobby", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, - }, - }, - { - name: "repeated and repeated", - typ: "Slice6", - expected: []fields.Field{ - {Type: "Slice6", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice6", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobbies", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated}}, - }, - }, - { - name: "nested repeated and repeated", - typ: "Slice7", - expected: []fields.Field{ - {Type: "Slice7", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Thing", "ID"}, FieldTypes: []string{"Slice6", "int32"}, ColumnNames: []string{"thing", "id"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Slice7", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Thing", "Hobbies", "Names"}, FieldTypes: []string{"Slice6", "Hobby2", "string"}, ColumnNames: []string{"thing", "hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Repeated}}, - }, - }, - { - name: "dremel paper example", - typ: "Document", - expected: []fields.Field{ - {Type: "Document", FieldNames: []string{"DocID"}, FieldTypes: []string{"int64"}, TypeName: "int64", FieldType: "Int64Field", ParquetType: "Int64Type", ColumnNames: []string{"DocID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "Document", FieldNames: []string{"Links", "Backward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Backward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - {Type: "Document", FieldNames: []string{"Links", "Forward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Forward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Code"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 0}}, - {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Country"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 1}}, - {Type: "Document", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "URL"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 1}}, - }, - }, - { - name: "embedded embedded embedded", - typ: "A", - expected: []fields.Field{ - {Type: "A", FieldNames: []string{"D"}, FieldTypes: []string{"int32"}, TypeName: "int32", FieldType: "Int32Field", ParquetType: "Int32Type", ColumnNames: []string{"D"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"C"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"C"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"B"}, FieldTypes: []string{"bool"}, TypeName: "bool", FieldType: "BoolField", ParquetType: "BoolType", ColumnNames: []string{"B"}, Category: "bool", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"Name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, - }, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - out, err := parse.Fields(tc.typ, "./parse_test.go") - assert.Nil(t, err, tc.name) - assert.Equal(t, tc.expected, out.Fields, tc.name) - if assert.Equal(t, len(tc.errors), len(out.Errors), tc.name) { - for i, err := range out.Errors { - assert.EqualError(t, tc.errors[i], err.Error(), tc.name) - } - } else { - for _, err := range out.Errors { - fmt.Println(err) - } - } - }) - } -} - -func pint32(i int32) *int32 { - return &i -} - -func prt(rt sch.FieldRepetitionType) *sch.FieldRepetitionType { - return &rt -} - -func pt(t sch.Type) *sch.Type { - return &t -} - -func TestDefIndex(t *testing.T) { - testCases := []struct { - def int - field fields.Field - expected int - }{ - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, - expected: 1, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, - expected: 2, - }, - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, - expected: 0, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, - expected: 2, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, - expected: 1, - }, - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, - expected: 0, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) - }) - } -} diff --git a/internal/parse/parse.go b/internal/parse/parse.go deleted file mode 100644 index 560ea0c..0000000 --- a/internal/parse/parse.go +++ /dev/null @@ -1,313 +0,0 @@ -package parse - -import ( - "fmt" - "go/parser" - "go/token" - "log" - "strings" - - "go/ast" - - "github.com/parsyl/parquet/internal/fields" - flds "github.com/parsyl/parquet/internal/fields" -) - -const letters = "abcdefghijklmnopqrstuvwxyz" - -type field struct { - Field fields.Field - tagNames []string - fieldName string - fieldType string - omit bool - embedded bool - optional bool - repeated bool - err error -} - -// Result holds the fields and errors that are generated -// by reading a go struct. -type Result struct { - // Fields are the fields that will be written to and read from a parquet file. - Fields []flds.Field - // Errors is a list of errors that occurred while parsing a struct. - Errors []error -} - -// Fields gets the fields of the given struct. -// pth must be a go file that defines the typ struct. -// Any embedded structs must also be in that same file. -func Fields(typ, pth string) (*Result, error) { - fullTyp := typ - typ = getType(fullTyp) - - fset := token.NewFileSet() - file, err := parser.ParseFile(fset, pth, nil, 0) - if err != nil { - log.Fatal(err) - } - - f := &finder{n: map[string]ast.Node{}} - - ast.Walk(visitorFunc(f.findTypes), file) - - if f.n == nil { - return nil, fmt.Errorf("could not find %s", typ) - } - - fields, err := doGetFields(f.n) - if err != nil { - return nil, err - } - - var out []field - var errs []error - var i int - - for _, f := range fields[typ] { - i, out, errs = getOut(i, f, fields, errs, out) - } - - return &Result{ - Fields: getFields(fullTyp, out, fields), - Errors: errs, - }, nil -} - -func getOut(i int, f field, fields map[string][]field, errs []error, out []field) (int, []field, []error) { - ff, ok := fields[f.fieldType] - var o flds.RepetitionType = flds.Required - if strings.Contains(f.Field.TypeName, "*") { - o = flds.Optional - } else if f.repeated || strings.Contains(f.Field.TypeName, "[]") { - o = flds.Repeated - } - if ok { - for _, fld := range ff { - if fld.embedded { - x, more, moreerrs := getOut(0, fld, fields, nil, nil) - i += x - out = append(out, more...) - errs = append(errs, moreerrs...) - } else { - if (!fld.optional && (o == flds.Optional || f.optional)) || (!fld.repeated && (o == flds.Repeated || f.repeated)) { - fld = makeOptional(fld) - } - - if !f.embedded { - fld.Field.RepetitionTypes = append(append(f.Field.RepetitionTypes[:0:0], f.Field.RepetitionTypes...), o) //make a copy - fld.Field.FieldNames = append(f.Field.FieldNames, fld.Field.FieldNames...) - fld.Field.FieldTypes = append(f.Field.FieldTypes, fld.Field.FieldTypes...) - fld.Field.ColumnNames = append(f.Field.ColumnNames, fld.Field.ColumnNames...) - } - i, out, errs = getOut(i, fld, fields, errs, out) - } - } - return i, out, errs - } else if f.err == nil { - _, ok := types[f.fieldType] - if ok { - f.Field.RepetitionTypes = append(f.Field.RepetitionTypes, o) - out = append(out, f) - i++ - } else { - errs = append(errs, fmt.Errorf("unsupported type: %s", f.fieldName)) - } - } - return i, out, errs -} - -func makeOptional(f field) field { - f.optional = true - fn, cat, pt := lookupTypeAndCategory(strings.Replace(strings.Replace(f.Field.TypeName, "*", "", 1), "[]", "", 1), true, true) - f.Field.FieldType = fn - f.Field.ParquetType = pt - f.Field.Category = cat - return f -} - -func getType(typ string) string { - parts := strings.Split(typ, ".") - return parts[len(parts)-1] -} - -func getFields(fullTyp string, fields []field, m map[string][]field) []flds.Field { - typ := getType(fullTyp) - out := make([]flds.Field, 0, len(fields)) - for _, f := range fields { - _, ok := m[typ] - if f.omit || !ok { - continue - } - - if f.repeated { - f.Field.TypeName = fmt.Sprintf("[]%s", f.Field.TypeName) - } - - f.Field.Type = fullTyp - out = append(out, f.Field) - } - return out -} - -func isPrivate(x *ast.Field) bool { - var s string - if len(x.Names) == 0 { - s = fmt.Sprintf("%s", x.Type) - } else { - s = fmt.Sprintf("%s", x.Names[0]) - } - return strings.Contains(letters, string(s[0])) -} - -func doGetFields(n map[string]ast.Node) (map[string][]field, error) { - fields := map[string][]field{} - for k, n := range n { - ast.Inspect(n, func(n ast.Node) bool { - switch x := n.(type) { - case *ast.Field: - if len(x.Names) == 1 && !isPrivate(x) { - f := getField(x.Names[0].Name, x) - fields[k] = append(fields[k], f) - } else if len(x.Names) == 0 && !isPrivate(x) { - fields[k] = append(fields[k], field{embedded: true, fieldType: fmt.Sprintf("%s", x.Type), Field: flds.Field{TypeName: fmt.Sprintf("%s", x.Type)}}) - } - case *ast.ArrayType: - s := fields[k] - f := s[len(s)-1] - f.repeated = true - s[len(s)-1] = f - fields[k] = s - } - return true - }) - } - return fields, nil -} - -func getField(name string, x ast.Node) field { - var typ, tag string - var optional, repeated bool - ast.Inspect(x, func(n ast.Node) bool { - switch t := n.(type) { - case *ast.Field: - if t.Tag != nil { - tag = parseTag(t.Tag.Value) - } - typ = fmt.Sprintf("%s", t.Type) - case *ast.ArrayType: - at := n.(*ast.ArrayType) - s := fmt.Sprintf("%v", at.Elt) - typ = s - repeated = true - case *ast.StarExpr: - optional = true - typ = fmt.Sprintf("%s", t.X) - case ast.Expr: - s := fmt.Sprintf("%v", t) - _, ok := types[s] - if ok { - typ = s - } - } - return true - }) - - if tag == "" { - tag = name - } - - fn, cat, pt := lookupTypeAndCategory(typ, optional, repeated) - return field{ - Field: flds.Field{ - FieldNames: []string{name}, - FieldTypes: []string{typ}, - ColumnNames: []string{tag}, - TypeName: getTypeName(typ, optional), - FieldType: fn, - ParquetType: pt, - Category: cat}, - fieldName: name, - fieldType: typ, - omit: tag == "-", - optional: optional, - repeated: repeated, - } -} - -func parseTag(t string) string { - i := strings.Index(t, `parquet:"`) - if i == -1 { - return "" - } - t = t[i+9:] - return t[:strings.Index(t, `"`)] -} - -func getTypeName(s string, optional bool) string { - var star string - if optional { - star = "*" - } - return fmt.Sprintf("%s%s", star, s) -} - -func lookupTypeAndCategory(name string, optional, repeated bool) (string, string, string) { - var op string - if optional || repeated { - op = "Optional" - } - f, ok := types[name] - if !ok { - return "", "", "" - } - return fmt.Sprintf(f.name, op, "Field"), fmt.Sprintf(f.category, op), fmt.Sprintf(f.name, "", "Type") -} - -type fieldType struct { - name string - category string -} - -var types = map[string]fieldType{ - "int32": {"Int32%s%s", "numeric%s"}, - "uint32": {"Uint32%s%s", "numeric%s"}, - "int64": {"Int64%s%s", "numeric%s"}, - "uint64": {"Uint64%s%s", "numeric%s"}, - "float32": {"Float32%s%s", "numeric%s"}, - "float64": {"Float64%s%s", "numeric%s"}, - "bool": {"Bool%s%s", "bool%s"}, - "string": {"String%s%s", "string%s"}, -} - -type visitorFunc func(n ast.Node) ast.Visitor - -func (f visitorFunc) Visit(n ast.Node) ast.Visitor { - return f(n) -} - -type finder struct { - n map[string]ast.Node -} - -func (f *finder) findTypes(n ast.Node) ast.Visitor { - switch n := n.(type) { - case *ast.ImportSpec: - return visitorFunc(f.findTypes) - case *ast.Package: - return visitorFunc(f.findTypes) - case *ast.File: - return visitorFunc(f.findTypes) - case *ast.GenDecl: - if n.Tok == token.TYPE { - return visitorFunc(f.findTypes) - } - case *ast.TypeSpec: - f.n[n.Name.Name] = n - return visitorFunc(f.findTypes) - } - - return nil -} diff --git a/parquet_generated_test.go b/parquet_generated_test.go index 4519e5f..42dd044 100644 --- a/parquet_generated_test.go +++ b/parquet_generated_test.go @@ -46,6 +46,7 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewInt32Field(readID, writeID, []string{"id"}, fieldCompression(compression)), + NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), NewInt32OptionalField(readAge, writeAge, []string{"age"}, []int{1}, optionalFieldCompression(compression)), NewInt64Field(readHappiness, writeHappiness, []string{"happiness"}, fieldCompression(compression)), NewInt64OptionalField(readSadness, writeSadness, []string{"sadness"}, []int{1}, optionalFieldCompression(compression)), @@ -60,7 +61,10 @@ func Fields(compression compression) []Field { NewBoolField(readHungry, writeHungry, []string{"hungry"}, fieldCompression(compression)), NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsID, writeFriendsID, []string{"friends", "id"}, []int{2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readFriendsName, writeFriendsName, []string{"friends", "name"}, []int{2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsAge, writeFriendsAge, []string{"friends", "age"}, []int{2, 1}, optionalFieldCompression(compression)), NewBoolField(readSleepy, writeSleepy, []string{"Sleepy"}, fieldCompression(compression)), } @@ -74,6 +78,14 @@ func writeID(x *Person, vals []int32) { x.ID = vals[0] } +func readName(x Person) string { + return x.Name +} + +func writeName(x *Person, vals []string) { + x.Name = vals[0] +} + func readAge(x Person) ([]int32, []uint8, []uint8) { switch { case x.Age == nil: @@ -276,22 +288,116 @@ func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: - if x.Hobby == nil { - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - } else { - x.Hobby.Difficulty = pint32(vals[0]) - } + x.Hobby.Difficulty = pint32(vals[0]) return 1, 1 } return 0, 1 } +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 >= 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) + nVals++ + } + } + + return nVals, nLevels +} + +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 >= 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Difficulty) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + nVals++ + } + } + + return nVals, nLevels +} + func readFriendsID(x Person) ([]int32, []uint8, []uint8) { var vals []int32 var defs, reps []uint8 @@ -302,7 +408,7 @@ func readFriendsID(x Person) ([]int32, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -330,12 +436,53 @@ func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { switch def { case 1: - switch rep { - case 0: - x.Friends = []Being{{ID: vals[nVals]}} - case 1: - x.Friends = append(x.Friends, Being{ID: vals[nVals]}) + x.Friends = append(x.Friends, Being{ID: vals[nVals]}) + nVals++ + } + } + + return nVals, nLevels +} + +func readFriendsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Friends) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Friends { + if i0 >= 1 { + lastRep = 1 } + defs = append(defs, 1) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + + return vals, defs, reps +} + +func writeFriendsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Friends[ind[0]].Name = vals[nVals] nVals++ } } @@ -353,7 +500,7 @@ func readFriendsAge(x Person) ([]int32, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.Age == nil { @@ -386,10 +533,7 @@ func writeFriendsAge(x *Person, vals []int32, defs, reps []uint8) (int, int) { switch def { case 2: - switch rep { - default: - x.Friends[ind[0]].Age = pint32(vals[nVals]) - } + x.Friends[ind[0]].Age = pint32(vals[nVals]) nVals++ } } @@ -785,6 +929,80 @@ func (f *Int32Field) Levels() ([]uint8, []uint8) { return nil, nil } +type StringField struct { + parquet.RequiredField + vals []string + read func(r Person) string + write func(r *Person, vals []string) + stats *stringStats +} + +func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), + } +} + +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringField) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *StringField) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *StringField) Levels() ([]uint8, []uint8) { + return nil, nil +} + type Int32OptionalField struct { parquet.OptionalField vals []int32 @@ -1456,80 +1674,6 @@ func (f *Uint64OptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } -type StringField struct { - parquet.RequiredField - vals []string - read func(r Person) string - write func(r *Person, vals []string) - stats *stringStats -} - -func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { - return &StringField{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newStringStats(), - } -} - -func (f *StringField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < pg.N; j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringField) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *StringField) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *StringField) Levels() ([]uint8, []uint8) { - return nil, nil -} - type BoolField struct { parquet.RequiredField vals []bool @@ -1634,6 +1778,54 @@ func (f *int32stats) Max() []byte { return f.bytes(f.max) } +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + type int32optionalStats struct { min int32 max int32 @@ -2145,54 +2337,6 @@ func (f *uint64optionalStats) Max() []byte { return f.bytes(f.max) } -type stringStats struct { - vals []string - min []byte - max []byte -} - -func newStringStats() *stringStats { - return &stringStats{} -} - -func (s *stringStats) add(val string) { - s.vals = append(s.vals, val) -} - -func (s *stringStats) NullCount() *int64 { - return nil -} - -func (s *stringStats) DistinctCount() *int64 { - return nil -} - -func (s *stringStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - type boolStats struct{} func newBoolStats() *boolStats { return &boolStats{} } diff --git a/parquet_test.go b/parquet_test.go index ebc531a..18674ee 100644 --- a/parquet_test.go +++ b/parquet_test.go @@ -7,6 +7,7 @@ import ( "io" "math" "math/rand" + "os" "testing" "time" @@ -19,9 +20,15 @@ import ( func init() { rand.Seed(time.Now().UnixNano()) + if os.Getenv("INCLUDE+GZIP") == "true" { + compressionCases = append(compressionCases, "gzip") + } } -var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +var ( + letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + compressionCases = []string{"uncompressed", "snappy"} +) func TestParquet(t *testing.T) { type testCase struct { @@ -42,7 +49,18 @@ func TestParquet(t *testing.T) { { name: "single nested person", input: [][]Person{ - {{Hobby: &Hobby{Name: "napping", Difficulty: pint32(10)}}}, + { + { + Hobby: &Hobby{ + Name: "napping", + Difficulty: pint32(10), + Skills: []Skill{ + {Name: "meditation", Difficulty: "very"}, + {Name: "calmness", Difficulty: "so-so"}, + }, + }, + }, + }, }, }, { @@ -435,7 +453,7 @@ func TestParquet(t *testing.T) { } for i, tc := range testCases { - for j, comp := range []string{"uncompressed", "snappy", "gzip"} { + for j, comp := range compressionCases { t.Run(fmt.Sprintf("%02d %s %s", 2*i+j, tc.name, comp), func(t *testing.T) { if tc.pageSize == 0 { tc.pageSize = 100 @@ -514,7 +532,7 @@ func TestPageHeaders(t *testing.T) { return } - assert.Equal(t, 72, len(pageHeaders)) + assert.Equal(t, 88, len(pageHeaders)) } func TestStats(t *testing.T) { @@ -969,13 +987,20 @@ func writeFloat64(f float64) []byte { } type Being struct { - ID int32 `parquet:"id"` - Age *int32 `parquet:"age"` + ID int32 `parquet:"id"` + Name string `parquet:"name"` + Age *int32 `parquet:"age"` } -type Hobby struct { +type Skill struct { Name string `parquet:"name"` - Difficulty *int32 `parquet:"difficulty"` + Difficulty string `parquet:"difficulty"` +} + +type Hobby struct { + Name string `parquet:"name"` + Difficulty *int32 `parquet:"difficulty"` + Skills []Skill `parquet:"skills"` } type Person struct {