Skip to content

Commit

Permalink
Use parquet tag for column names
Browse files Browse the repository at this point in the history
  • Loading branch information
cswank committed Sep 7, 2019
1 parent be3626b commit 5f9dca5
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 111 deletions.
2 changes: 1 addition & 1 deletion internal/dremel/dremel_generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 9 additions & 9 deletions internal/dremel/dremel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,22 +112,22 @@ func TestDremel(t *testing.T) {
}

type Link struct {
Backward []int64
Forward []int64
Backward []int64 `parquet:"backward"`
Forward []int64 `parquet:"forward"`
}

type Language struct {
Code string
Country *string
Code string `parquet:"code"`
Country *string `parquet:"country"`
}

type Name struct {
Languages []Language
URL *string
Languages []Language `parquet:"languages"`
URL *string `parquet:"url"`
}

type Document struct {
DocID int64
Link *Link
Names []Name
DocID int64 `parquet:"docid"`
Link *Link `parquet:"link"`
Names []Name `parquet:"names"`
}
2 changes: 1 addition & 1 deletion internal/dremel/write_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ func TestWrite(t *testing.T) {
{
name: "writeFriendsID",
fields: []fields.Field{
{Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", ColumnName: "id", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}},
{Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}},
},
result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) {
var nVals, nLevels int
Expand Down
8 changes: 4 additions & 4 deletions internal/fields/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ type Field struct {
Type string
RepetitionTypes RepetitionTypes
FieldNames []string
ColumnNames []string
FieldTypes []string
Seen RepetitionTypes
TypeName string
FieldType string
ParquetType string
ColumnName string
Category string
}

Expand Down Expand Up @@ -249,9 +249,9 @@ func (f Field) parent(start int) string {
}

func (f Field) Path() string {
out := make([]string, len(f.FieldNames))
for i, n := range f.FieldNames {
out[i] = fmt.Sprintf(`"%s"`, strings.ToLower(n))
out := make([]string, len(f.ColumnNames))
for i, n := range f.ColumnNames {
out[i] = fmt.Sprintf(`"%s"`, n)
}
return strings.Join(out, ", ")
}
Expand Down
2 changes: 1 addition & 1 deletion internal/gen/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ var (
}
return out
},
"columnName": func(f fields.Field) string { return strings.ToLower(strings.Join(f.FieldNames, ".")) },
"columnName": func(f fields.Field) string { return strings.Join(f.ColumnNames, ".") },
"writeFunc": dremel.Write,
"readFunc": dremel.Read,
"writeFuncName": func(f fields.Field) string { return fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")) },
Expand Down
2 changes: 1 addition & 1 deletion internal/gen/template.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

148 changes: 74 additions & 74 deletions internal/parse/fields_test.go

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions internal/parse/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const letters = "abcdefghijklmnopqrstuvwxyz"

type field struct {
Field fields.Field
tagName string
tagNames []string
fieldName string
fieldType string
omit bool
Expand Down Expand Up @@ -96,6 +96,7 @@ func getOut(i int, f field, fields map[string][]field, errs []error, out []field
fld.Field.RepetitionTypes = append(append(f.Field.RepetitionTypes[:0:0], f.Field.RepetitionTypes...), o) //make a copy
fld.Field.FieldNames = append(f.Field.FieldNames, fld.Field.FieldNames...)
fld.Field.FieldTypes = append(f.Field.FieldTypes, fld.Field.FieldTypes...)
fld.Field.ColumnNames = append(f.Field.ColumnNames, fld.Field.ColumnNames...)
}
i, out, errs = getOut(i, fld, fields, errs, out)
}
Expand Down Expand Up @@ -142,11 +143,6 @@ func getFields(fullTyp string, fields []field, m map[string][]field) []flds.Fiel
}

f.Field.Type = fullTyp
if f.tagName != "" {
f.Field.ColumnName = f.tagName
} else {
f.Field.ColumnName = strings.Join(f.Field.FieldNames, ".")
}
out = append(out, f.Field)
}
return out
Expand Down Expand Up @@ -215,18 +211,22 @@ func getField(name string, x ast.Node) field {
return true
})

if tag == "" {
tag = name
}

fn, cat, pt := lookupTypeAndCategory(typ, optional, repeated)
return field{
Field: flds.Field{
FieldNames: []string{name},
FieldTypes: []string{typ},
ColumnNames: []string{tag},
TypeName: getTypeName(typ, optional),
FieldType: fn,
ParquetType: pt,
Category: cat},
fieldName: name,
fieldType: typ,
tagName: tag,
omit: tag == "-",
optional: optional,
repeated: repeated,
Expand Down
12 changes: 6 additions & 6 deletions internal/parse/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,21 +120,21 @@ type Slice4 struct {
}

type Hobby2 struct {
Names []string
Names []string `parquet:"names"`
}

type Slice5 struct {
ID int32 `parquet:"id"`
Hobby Hobby2
ID int32 `parquet:"id"`
Hobby Hobby2 `parquet:"hobby"`
}

type Slice6 struct {
ID int32
Hobbies []Hobby2
ID int32 `parquet:"id"`
Hobbies []Hobby2 `parquet:"hobbies"`
}

type Slice7 struct {
Thing *Slice6
Thing *Slice6 `parquet:"thing"`
}

type Link struct {
Expand Down
10 changes: 5 additions & 5 deletions parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func (m *Metadata) Footer(w io.Writer) error {
}

for _, col := range mrg.fields.fields {
ch, ok := mrg.columns[strings.ToLower(strings.Join(col.Path, "."))]
ch, ok := mrg.columns[strings.Join(col.Path, ".")]
if !ok {
continue
}
Expand Down Expand Up @@ -307,7 +307,7 @@ func schemaElements(fields []Field) schema {
for _, f := range fields {
var z int32
se := sch.SchemaElement{
Name: strings.ToLower(f.Name),
Name: f.Name,
TypeLength: &z,
Scale: &z,
Precision: &z,
Expand All @@ -316,7 +316,7 @@ func schemaElements(fields []Field) schema {

f.Type(&se)
f.RepetitionType(&se)
m[strings.ToLower(strings.Join(f.Path, "."))] = se
m[strings.Join(f.Path, ".")] = se
}

return schema{lookup: m, fields: fields}
Expand All @@ -331,7 +331,7 @@ func (m *Metadata) Pages() (map[string][]Page, error) {
for _, rg := range m.metadata.RowGroups {
for _, ch := range rg.Columns {
pth := ch.MetaData.PathInSchema
_, ok := m.schema.lookup[strings.ToLower(strings.Join(pth, "."))]
_, ok := m.schema.lookup[strings.Join(pth, ".")]
if !ok {
return nil, fmt.Errorf("could not find schema for %v", pth)
}
Expand All @@ -342,7 +342,7 @@ func (m *Metadata) Pages() (map[string][]Page, error) {
Size: int(ch.MetaData.TotalCompressedSize),
Codec: ch.MetaData.Codec,
}
k := strings.ToLower(strings.Join(pth, "."))
k := strings.Join(pth, ".")
out[k] = append(out[k], pg)
}
}
Expand Down
4 changes: 2 additions & 2 deletions parquet_generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5f9dca5

Please sign in to comment.