Skip to content

Commit

Permalink
Optional bool was still broken
Browse files Browse the repository at this point in the history
  • Loading branch information
Craig Swank committed Feb 15, 2019
1 parent d27cfdb commit 927a355
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 38 deletions.
61 changes: 25 additions & 36 deletions parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package parquet
import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
Expand Down Expand Up @@ -356,50 +355,40 @@ func StringType(se *sch.SchemaElement) {
}

func GetBools(r io.Reader, n int, pageSizes []int) ([]bool, error) {
sizes := getSizes(pageSizes)
var index int
var vals [8]uint32
data, _ := ioutil.ReadAll(r)
out := make([]bool, n)
var size int
for i := 0; i < n; i++ {
if index == 0 {
if len(data) == 0 {
return nil, errors.New("not enough data to decode all values")
}
vals = unpack8uint32(data[0])
data = data[1:]
size = sizes[0]
sizes = sizes[1:]
out := make([]bool, 0, n)
for _, nVals := range pageSizes {
if nVals == 0 {
continue
}

l := (nVals / 8)
if nVals%8 > 0 {
l++
}
out[i] = vals[index] == 1
if size > 0 {
index = (index + 1) % size
} else {
index = 0

var i int
chunk := data[:l]
data = data[l:]
for _, b := range chunk {
vals = unpack8uint32(b)
m := min(nVals, 8)
for j := 0; j < m; j++ {
out = append(out, vals[j] == 1)
}
i += m
nVals -= m
}
}
return out, nil
}

func getSizes(sizes []int) []int {
var out []int
for _, s := range sizes {
if s > 8 {
for s > 0 {
if s > 8 {
out = append(out, 8)
s -= 8
} else {
out = append(out, s)
s = 0
}
}
} else {
out = append(out, s)
}
func min(a, b int) int {
if a < b {
return a
}
return out
return b
}

func unpack8uint32(data byte) [8]uint32 {
Expand Down
2 changes: 0 additions & 2 deletions parquet_generated_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ func (f *Int32OptionalField) Read(r io.ReadSeeker, meta *parquet.Metadata, pos p
if err != nil {
return err
}

v := make([]int32, f.Values()-len(f.vals))
err = binary.Read(rr, binary.LittleEndian, &v)
f.vals = append(f.vals, v...)
Expand Down Expand Up @@ -764,7 +763,6 @@ func (f *BoolOptionalField) Scan(r *Person) {
if len(f.Defs) == 0 {
return
}

var val *bool
if f.Defs[0] == 1 {
v := f.vals[0]
Expand Down
42 changes: 42 additions & 0 deletions parquet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package parquet_test
import (
"bytes"
"fmt"
"math/rand"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -264,6 +265,21 @@ func TestParquet(t *testing.T) {
},
},
},
{
name: "boolean optional large amount small page size",
pageSize: 2,
input: getOptBools(31),
},
{
name: "boolean optional really large amount small page size",
pageSize: 2,
input: getOptBools(3001),
},
{
name: "boolean optional really large amount large page size",
pageSize: 3000,
input: getOptBools(3001),
},
{
name: "boolean multiple row groups small page size",
pageSize: 2,
Expand Down Expand Up @@ -411,3 +427,29 @@ func pbool(b bool) *bool { return &b }
func pstring(s string) *string { return &s }
func pfloat32(f float32) *float32 { return &f }
func pfloat64(f float64) *float64 { return &f }

func getOptBools(count int) [][]Person {
var out [][]Person
var rg []Person
for i := 0; i < count; i++ {
if i > 0 && i%100 == 0 {
out = append(out, rg)
rg = []Person{}
}
r := rand.Intn(3)
var b *bool
switch r {
case 1:
x := true
b = &x
case 3:
x := false
b = &x
}
rg = append(rg, Person{Keen: b})
}
if len(rg) > 0 {
out = append(out, rg)
}
return out
}

0 comments on commit 927a355

Please sign in to comment.