Skip to content

Commit

Permalink
fix reading
Browse files Browse the repository at this point in the history
  • Loading branch information
Moelf committed Nov 13, 2024
1 parent 4c545e4 commit b0e5f29
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 62 deletions.
6 changes: 3 additions & 3 deletions src/RNTuple/Writing/Stubs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const dummy_padding2 = [
const RBlob1 = UnROOT.RBlob(; fNbytes = 0x00DC, fVersion = 0x0004, fObjLen = 0x000000BA, fDatime = WRITE_TIME, fKeyLen = 0x0022,
fCycle = 0x0001, fSeekKey = 244, fSeekPdir = 100, fClassName = "RBlob", fName = "", fTitle = "")
const rnt_header = UnROOT.RNTupleHeader(zero(UInt64), "myntuple", "", "ROOT v6.33.01", [
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), 0, -1, -1, "one_uint", "std::uint32_t", "", ""),
UnROOT.FieldRecord(zero(UInt32), zero(UInt32), zero(UInt32), zero(UInt16), zero(UInt16), "one_uint", "std::uint32_t", "", "", 0, -1, -1),
], [UnROOT.ColumnRecord(0x14, 0x20, zero(UInt32), 0x00, 0x00, 0),], UnROOT.AliasRecord[], UnROOT.ExtraTypeInfo[])


Expand All @@ -49,14 +49,14 @@ UnROOT.RNTuplePageTopList([
const pagelink = UnROOT.PageLink(0x3dec59c009c67e28, cluster_summary.payload, nested_page_locations)

const RBlob4 = UnROOT.RBlob(0x00C2, 0x0004, 0x000000A0, WRITE_TIME, 0x0022, 0x0001, 0x029c, 100, "RBlob", "", "")
const rnt_footer = UnROOT.RNTupleFooter(0, 0x3dec59c009c67e28, UnROOT.RNTupleSchemaExtension([], [], [], []), [], [
const rnt_footer = UnROOT.RNTupleFooter(0, 0x3dec59c009c67e28, UnROOT.RNTupleSchemaExtension([], [], [], []), [
UnROOT.ClusterGroupRecord(0, 1, 1, UnROOT.EnvLink(0x000000000000007c, UnROOT.Locator(124, 0x0000000000000220, ))),
])
const tkey32_anchor = UnROOT.TKey32(134, 4, 70, WRITE_TIME, 64, 1, 866, 100, "ROOT::Experimental::RNTuple", "myntuple", "")
# these 6 bytes are between tkey32_anchor and the actual anchor
const magic_6bytes = [0x40, 0x00, 0x00, 0x42, 0x00, 0x06]

const rnt_anchor = UnROOT.ROOT_3a3a_Experimental_3a3a_RNTuple(0x0000, 0x0002, 0x0000, 0x0000, 0x0000000000000116, 0x00000000000000ba, 0x00000000000000ba, 0x00000000000002be, 0x00000000000000a0, 0x00000000000000a0, 0x0000000040000000, 0xdc495fd01479af1b)
const rnt_anchor = UnROOT.ROOT_3a3a_RNTuple(0x0000, 0x0002, 0x0000, 0x0000, 0x0000000000000116, 0x00000000000000ba, 0x00000000000000ba, 0x00000000000002be, 0x00000000000000a0, 0x00000000000000a0, 0x0000000040000000, 0xdc495fd01479af1b)
const tkey32_TDirectory = UnROOT.TKey32(121, 4, 68, WRITE_TIME, 53, 1, 0x000003ec, 100, "", "test_ntuple_minimal.root", "")
# 1 key, and it is the RNTuple Anchor
const n_keys = [
Expand Down
5 changes: 2 additions & 3 deletions src/RNTuple/Writing/TFileWriter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,6 @@ function rnt_write(io::IO, x::UnROOT.RNTupleFooter; envelope=true)
rnt_write(temp_io, x.feature_flag)
rnt_write(temp_io, x.header_checksum)
rnt_write(temp_io, x.extension_header_links)
rnt_write(temp_io, Write_RNTupleListFrame(x.column_group_records))
rnt_write(temp_io, Write_RNTupleListFrame(x.cluster_group_records))

# add id_length size and checksum size
Expand All @@ -415,7 +414,7 @@ function rnt_write(io::IO, x::UnROOT.RNTupleFooter; envelope=true)
end
end

function rnt_write(io::IO, x::UnROOT.ROOT_3a3a_Experimental_3a3a_RNTuple)
function rnt_write(io::IO, x::UnROOT.ROOT_3a3a_RNTuple)
temp_io = IOBuffer()
rnt_write(temp_io, x.fVersionEpoch; legacy=true)
rnt_write(temp_io, x.fVersionMajor; legacy=true)
Expand Down Expand Up @@ -597,7 +596,7 @@ function write_rntuple(file::IO, table; file_name="test_ntuple_minimal.root", rn

RBlob4_obs = rnt_write_observe(file, Stubs.RBlob4)
rntAnchor_update[:fSeekFooter] = UInt32(position(file))
rnt_footer = UnROOT.RNTupleFooter(0, _checksum(rnt_header_obs.object), UnROOT.RNTupleSchemaExtension([], [], [], []), [], [
rnt_footer = UnROOT.RNTupleFooter(0, _checksum(rnt_header_obs.object), UnROOT.RNTupleSchemaExtension([], [], [], []), [
UnROOT.ClusterGroupRecord(0, input_length, 1, UnROOT.EnvLink(pagelink_obs.len, UnROOT.Locator(pagelink_obs.len, pagelink_obs.position, ))),
])
rnt_footer_obs = rnt_write_observe(file, rnt_footer)
Expand Down
12 changes: 9 additions & 3 deletions src/RNTuple/bootstrap.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://github.com/root-project/root/blob/a4deb370c9b9870f0391036890981f648559ef68/tree/ntuple/v7/inc/ROOT/RNTupleAnchor.hxx#L69
Base.@kwdef struct ROOT_3a3a_Experimental_3a3a_RNTuple <: ROOTStreamedObject
Base.@kwdef struct ROOT_3a3a_RNTuple <: ROOTStreamedObject
fVersionEpoch::UInt16
fVersionMajor::UInt16
fVersionMinor::UInt16
Expand All @@ -14,13 +14,13 @@ Base.@kwdef struct ROOT_3a3a_Experimental_3a3a_RNTuple <: ROOTStreamedObject
fChecksum::UInt64
end

function ROOT_3a3a_Experimental_3a3a_RNTuple(io, tkey::TKey, refs)
function ROOT_3a3a_RNTuple(io, tkey::TKey, refs)
local_io = datastream(io, tkey)
skip(local_io, 6)
_before_anchor = position(local_io)
anchor_checksum = xxh3_64(read(local_io, 2*4 + 7*8))
seek(local_io, _before_anchor)
anchor = ROOT_3a3a_Experimental_3a3a_RNTuple(;
anchor = ROOT_3a3a_RNTuple(;
fVersionEpoch = readtype(local_io, UInt16),
fVersionMajor = readtype(local_io, UInt16),
fVersionMinor = readtype(local_io, UInt16),
Expand All @@ -36,6 +36,12 @@ function ROOT_3a3a_Experimental_3a3a_RNTuple(io, tkey::TKey, refs)
)

@assert anchor.fChecksum == anchor_checksum "RNtuple anchor checksum doesn't match"
# only support version 1.0.x.x for the moment.
if anchor.fVersionEpoch == 0 && anchor.fVersionMajor == 3
elseif anchor.fVersionEpoch == 1 && anchor.fVersionMajor == 0
else
error("RNTuple with specification version $(anchor.fVersionEpoch).$(anchor.fVersionMajor).x.x is not yet supported.")
end


header_bytes = decompress_bytes(read_seek_nb(io, anchor.fSeekHeader, anchor.fNBytesHeader), anchor.fLenHeader)
Expand Down
72 changes: 41 additions & 31 deletions src/RNTuple/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,49 @@
Base.promote_rule(::Type{Int64}, ::Type{Index64}) = Int64
Base.promote_rule(::Type{Index64}, ::Type{Int64}) = Int64

#https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md
const rntuple_col_type_dict = (
Index64,
Index32,
Switch, # Switch
UInt8, # byte in blob
UInt8, # char
Bool, # it's actually `Bit` in ROOT, there's no byte bool in RNTuple spec
Float64,
Float32,
Float16,
UInt64,
UInt32,
UInt16,
UInt8,
Index64, # split delta
Index32, # split delta
Float64, # split
Float32, # split
Float16, # split
UInt64, # split
UInt32, # split
UInt16, # split
@kwdef struct RNTuple_ColumnType
type::UInt8
nbits::Int
name::Symbol
jltype::DataType
issplit::Bool = false
isdelta::Bool = false
iszigzag::Bool = false
end

Int64,
Int32,
Int16,
Int8,
Int64, # split + Zig-Zag encoding
Int32, # split + Zig-Zag encoding
Int16, # split + Zig-Zag encoding
#https://github.com/root-project/root/blob/1de46e89958fd3946d2d6995c810391b781d39ac/tree/ntuple/v7/doc/BinaryFormatSpecification.md?plain=1#L479
const rntuple_col_type_table = (
RNTuple_ColumnType(type = 0x00, nbits = 1, name = :Bit , jltype = Bool),
RNTuple_ColumnType(type = 0x01, nbits = 8, name = :Byte , jltype = UInt8),
RNTuple_ColumnType(type = 0x02, nbits = 8, name = :Char , jltype = UInt8),
RNTuple_ColumnType(type = 0x03, nbits = 8, name = :Int8 , jltype = Int8 ),
RNTuple_ColumnType(type = 0x04, nbits = 8, name = :UInt8 , jltype = UInt8),
RNTuple_ColumnType(type = 0x05, nbits = 16, name = :Int16 , jltype = Int16),
RNTuple_ColumnType(type = 0x06, nbits = 16, name = :UInt16 , jltype = UInt16),
RNTuple_ColumnType(type = 0x07, nbits = 32, name = :Int32 , jltype = Int32),
RNTuple_ColumnType(type = 0x08, nbits = 32, name = :UInt32 , jltype = UInt32),
RNTuple_ColumnType(type = 0x09, nbits = 64, name = :Int64 , jltype = Int64),
RNTuple_ColumnType(type = 0x0A, nbits = 64, name = :UInt64 , jltype = UInt64),
RNTuple_ColumnType(type = 0x0B, nbits = 16, name = :Real16 , jltype = Float16),
RNTuple_ColumnType(type = 0x0C, nbits = 32, name = :Real32 , jltype = Float32),
RNTuple_ColumnType(type = 0x0D, nbits = 64, name = :Real64 , jltype = Float64),
RNTuple_ColumnType(type = 0x0E, nbits = 32, name = :Index32 , jltype = Index32),
RNTuple_ColumnType(type = 0x0F, nbits = 64, name = :Index64 , jltype = Index64),
RNTuple_ColumnType(type = 0x10, nbits = 96, name = :Switch , jltype = Switch),
RNTuple_ColumnType(type = 0x11, nbits = 16, name = :SplitInt16 , jltype = Int16, issplit=true, iszigzag=true),
RNTuple_ColumnType(type = 0x12, nbits = 16, name = :SplitUInt16 , jltype = UInt16, issplit=true),
RNTuple_ColumnType(type = 0x13, nbits = 64, name = :SplitInt32 , jltype = Int32, issplit=true, iszigzag=true),
RNTuple_ColumnType(type = 0x14, nbits = 32, name = :SplitUInt32 , jltype = UInt32, issplit=true),
RNTuple_ColumnType(type = 0x15, nbits = 64, name = :SplitInt64 , jltype = Int64, issplit=true, iszigzag=true),
RNTuple_ColumnType(type = 0x16, nbits = 64, name = :SplitUInt64 , jltype = UInt64, issplit=true),
RNTuple_ColumnType(type = 0x17, nbits = 16, name = :SplitReal16 , jltype = Float16, issplit=true),
RNTuple_ColumnType(type = 0x18, nbits = 32, name = :SplitReal32 , jltype = Float32, issplit=true),
RNTuple_ColumnType(type = 0x19, nbits = 64, name = :SplitReal64 , jltype = Float64, issplit=true),
RNTuple_ColumnType(type = 0x1A, nbits = 32, name = :SplitIndex32, jltype = Index32, issplit=true, isdelta=true),
RNTuple_ColumnType(type = 0x1B, nbits = 64, name = :SplitIndex64, jltype = Index64, issplit=true, isdelta=true),
# (0x1C, 10-31, :Real32Trunc ), #??
# (0x1D, 1-32, :Real32Quant ), #??
)
const rntuple_col_nbits_dict = Tuple([(sizeof.(rntuple_col_type_dict[1:5]) .* 8) ...; 1; (sizeof.(rntuple_col_type_dict[7:end]) .* 8)...])

const rntuple_role_leaf = 0x0000
const rntuple_role_vector = 0x0001
Expand Down
1 change: 0 additions & 1 deletion src/RNTuple/displays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ function Base.show(io::IO, footer::RNTupleFooter, indent=0)
println(io, "$ind feature_flag: $(footer.feature_flag)")
println(io, "$ind header_checksum: $(repr(footer.header_checksum))")
println(io, "$ind extension_header_links: $(footer.extension_header_links)")
println(io, "$ind column_group_records: $(footer.column_group_records)")
println(io, "$ind cluster_group_records: $(footer.cluster_group_records)")
end

Expand Down
7 changes: 4 additions & 3 deletions src/RNTuple/fieldcolumn_reading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,10 @@ function read_field(io, field::UnionField{S, T}, page_list) where {S, T}
end

function _detect_encoding(typenum)
split = 14 <= typenum <= 21 || 26 <= typenum <= 28
zigzag = 26 <= typenum <= 28
delta = 14 <= typenum <= 15
col_type = rntuple_col_type_table[typenum+1]
split = col_type.issplit
zigzag = col_type.iszigzag
delta = col_type.isdelta
return (;split, zigzag, delta)
end

Expand Down
6 changes: 3 additions & 3 deletions src/RNTuple/fieldcolumn_schema.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,18 @@ end
isvoid(::Type{<:RNTupleCardinality}) = false

function _search_col_type(field_id, column_records, col_id::Int...)
if length(col_id) == 2 && column_records[col_id[2]].type == 5
if length(col_id) == 2 && column_records[col_id[2]].type == 0x02 #Char
index_record = column_records[col_id[1]]
char_record = column_records[col_id[2]]
index_typenum = index_record.type
LeafType = rntuple_col_type_dict[index_typenum]
LeafType = rntuple_col_type_table[index_typenum+0x01].jltype
return StringField(
LeafField{LeafType}(col_id[1],index_record),
LeafField{Char}(col_id[2], char_record)
)
elseif length(col_id) == 1
record = column_records[only(col_id)]
LeafType = rntuple_col_type_dict[record.type]
LeafType = rntuple_col_type_table[record.type+0x01].jltype
return LeafField{LeafType}(only(col_id), record)
else
error("un-handled RNTuple case, report issue to UnROOT.jl")
Expand Down
5 changes: 0 additions & 5 deletions src/RNTuple/footer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ end
locator::Locator
end

@SimpleStruct struct ColumnGroupRecord
column_ids::Vector{UInt32}
end

@SimpleStruct struct ClusterGroupRecord
minimum_entry_number::Int64
entry_span::Int64
Expand Down Expand Up @@ -44,7 +40,6 @@ end
feature_flag::UInt64
header_checksum::UInt64
extension_header_links::RNTupleSchemaExtension
column_group_records::Vector{ColumnGroupRecord}
cluster_group_records::Vector{ClusterGroupRecord}
end

Expand Down
18 changes: 9 additions & 9 deletions src/RNTuple/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,38 @@ Base.@kwdef struct FieldRecord
parent_field_id::UInt32
struct_role::UInt16
flags::UInt16
repetition::Int64
source_field_id::Int32
root_streamer_checksum::Int32
field_name::String
type_name::String
type_alias::String
field_desc::String
repetition::Int64
source_field_id::Int32
root_streamer_checksum::Int32
end
function _rntuple_read(io, ::Type{FieldRecord})
field_version = read(io, UInt32)
type_version = read(io, UInt32)
parent_field_id = read(io, UInt32)
struct_role = read(io, UInt16)
flags = read(io, UInt16)
repetition = if !iszero(flags & 0x0001)
field_name, type_name, type_alias, field_desc = (_rntuple_read(io, String) for _=1:4)
repetition = if !iszero(flags & 0x01)
read(io, Int64)
else
0
end
source_field_id = if !iszero(flags & 0x0002)
source_field_id = if !iszero(flags & 0x02)
read(io, Int32)
else
-1
end
root_streamer_checksum = if !iszero(flags & 0x0004)
root_streamer_checksum = if !iszero(flags & 0x04)
read(io, Int32)
else
-1
end
field_name, type_name, type_alias, field_desc = (_rntuple_read(io, String) for _=1:4)
FieldRecord(field_version, type_version, parent_field_id,
struct_role, flags, repetition, source_field_id, root_streamer_checksum, field_name, type_name, type_alias, field_desc)
FieldRecord(;field_version, type_version, parent_field_id,
struct_role, flags, field_name, type_name, type_alias, field_desc, repetition, source_field_id, root_streamer_checksum)
end

struct ColumnRecord
Expand Down
2 changes: 1 addition & 1 deletion src/RNTuple/highlevel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ julia> LazyTree(f, "ntuple")
"""
struct RNTuple{O}
io::O
anchor::ROOT_3a3a_Experimental_3a3a_RNTuple
anchor::ROOT_3a3a_RNTuple
header::RNTupleHeader
footer::RNTupleFooter
pagelinks::Dict{Int, PageLink}
Expand Down

0 comments on commit b0e5f29

Please sign in to comment.