forked from fraugster/parquet-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
alloc_test.go
98 lines (84 loc) · 2.55 KB
/
alloc_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package goparquet
import (
"bytes"
"errors"
"fmt"
"io"
"testing"
"github.com/fraugster/parquet-go/parquet"
"github.com/fraugster/parquet-go/parquetschema"
"github.com/stretchr/testify/require"
)
func TestAllocTrackerTriggerError(t *testing.T) {
var buf bytes.Buffer
sd, err := parquetschema.ParseSchemaDefinition(`message test {
required binary foo (STRING);
}`)
require.NoError(t, err)
wr := NewFileWriter(&buf,
WithSchemaDefinition(sd),
WithMaxRowGroupSize(150*1024*1024),
WithMaxPageSize(150*1024*1024),
WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
)
// this should produce ~20 MiB easily compressible data
for i := 0; i < 20*1024; i++ {
err := wr.AddData(map[string]interface{}{
"foo": func() []byte {
var data [512]byte
data[0] = byte(i % 256)
data[1] = byte(i / 256)
return []byte(fmt.Sprintf("%x", data[:]))
}(),
})
require.NoError(t, err)
}
require.NoError(t, wr.FlushRowGroup())
require.NoError(t, wr.Close())
t.Logf("buf size: %d", buf.Len())
// we set a maximum memory size for that file of 10 MiB, so fully reading the file created earlier should fail.
r, err := NewFileReaderWithOptions(bytes.NewReader(buf.Bytes()), WithMaximumMemorySize(10*1024*1024))
require.NoError(t, err)
_, err = r.NextRow()
require.Error(t, err)
require.Contains(t, err.Error(), "bytes is greater than configured maximum of 10485760 bytes")
}
func TestAllocTrackerTriggerNoError(t *testing.T) {
var buf bytes.Buffer
sd, err := parquetschema.ParseSchemaDefinition(`message test {
required binary foo (STRING);
}`)
require.NoError(t, err)
wr := NewFileWriter(&buf,
WithSchemaDefinition(sd),
WithMaxPageSize(1024*1024),
WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
)
// this should produce ~20 MiB easily compressible data
for i := 0; i < 20*1024; i++ {
err := wr.AddData(map[string]interface{}{
"foo": func() []byte {
var data [512]byte
data[0] = byte(i % 256)
data[1] = byte(i / 256)
return []byte(fmt.Sprintf("%x", data[:]))
}(),
})
require.NoError(t, err)
}
require.NoError(t, wr.FlushRowGroup())
require.NoError(t, wr.Close())
t.Logf("buf size: %d", buf.Len())
// we set a maximum memory size for that file of 100 MiB, so fully reading the file created earlier should not fail.
r, err := NewFileReaderWithOptions(bytes.NewReader(buf.Bytes()), WithMaximumMemorySize(100*1024*1024))
require.NoError(t, err)
for i := 0; ; i++ {
_, err := r.NextRow()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
t.Fatalf("NextRow %d returned error: %v", i, err)
}
}
}