diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61f30a11caf2..84396f6aeb79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@ Date: 2024-03-21
 - [[PR 996]](https://github.com/parthenon-hpc-lab/parthenon/pull/996) Remove dynamic allocations from swarm particle creation
 
 ### Changed (changing behavior/API/variables/...)
+- [[PR1019](https://github.com/parthenon-hpc-lab/parthenon/pull/1019) Enable output for non-cell-centered variables
 - [[PR 973]](https://github.com/parthenon-hpc-lab/parthenon/pull/973) Multigrid performance upgrades
 
 ### Fixed (not changing behavior/API/variables/...)
@@ -55,6 +56,7 @@ Date: 2024-03-21
 - [[PR 982]](https://github.com/parthenon-hpc-lab/parthenon/pull/982) add some gut check testing for parthenon-VIBE
 
 ### Incompatibilities (i.e. breaking changes)
+- [[PR1019](https://github.com/parthenon-hpc-lab/parthenon/pull/1019) Remove support for file formats < 3
 - [[PR 987]](https://github.com/parthenon-hpc-lab/parthenon/pull/987) Change the API for what was IterativeTasks
 - [[PR 974]](https://github.com/parthenon-hpc-lab/parthenon/pull/974) Change GetParentPointer to always return T*
 - [[PR 996]](https://github.com/parthenon-hpc-lab/parthenon/pull/996) Remove dynamic allocations from swarm particle creation
diff --git a/README.md b/README.md
index 3df6b6871cdb..ed6a1bb05a16 100644
--- a/README.md
+++ b/README.md
@@ -13,11 +13,12 @@ Parthenon -- a performance portable block-structured adaptive mesh refinement fr
 * High performance by
   * device first/device resident approach (work data only in device memory to prevent expensive transfers between host and device)
   * transparent packing of data across blocks (to reduce/hide kernel launch latency)
-  * direct device-to-device communication via asynchronous, one-sided  MPI communication
+  * direct device-to-device communication via asynchronous MPI communication
 * Intermediate abstraction layer to hide complexity of device kernel launches
 * Flexible, plug-in package system
 * Abstract variables controlled via metadata flags
 * Support for particles
+* Support for cell-, node-, face-, and edge-centered fields
 * Multi-stage drivers/integrators with support for task-based parallelism
 
 # Community
diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst
index d78ca4d422dc..924cc3f7ce5e 100644
--- a/doc/sphinx/index.rst
+++ b/doc/sphinx/index.rst
@@ -15,11 +15,12 @@ Key Features
 
 * Device first/device resident approach (work data only in device memory to prevent expensive transfers between host and device)
 * Transparent packing of data across blocks (to reduce/hide kernel launch latency)
-* Direct device-to-device communication via asynchronous, one-sided  MPI communication
+* Direct device-to-device communication via asynchronous MPI communication
 * Intermediate abstraction layer to hide complexity of device kernel launches
 * Flexible, plug-in package system
 * Abstract variables controlled via metadata flags
 * Support for particles
+* Support for cell-, node-, face-, and edge-centered fields
 * Multi-stage drivers/integrators with support for task-based parallelism
 
 Community
diff --git a/doc/sphinx/src/outputs.rst b/doc/sphinx/src/outputs.rst
index 170c83ebac7e..2c274f04c078 100644
--- a/doc/sphinx/src/outputs.rst
+++ b/doc/sphinx/src/outputs.rst
@@ -398,6 +398,11 @@ capable of opening and visualizing Parthenon graphics dumps. In both
 cases, the ``.xdmf`` files should be opened. In ParaView, select the
 “XDMF Reader” when prompted.
 
+.. warning::
+   Currently parthenon face- and edge- centered data is not supported
+   for ParaView and VisIt. However, our python tooling does support
+   all mesh locations.
+
 Preparing outputs for ``yt``
 ----------------------------
 
diff --git a/scripts/python/packages/parthenon_tools/parthenon_tools/movie2d.py b/scripts/python/packages/parthenon_tools/parthenon_tools/movie2d.py
index b911eb0b88aa..363f4d00eb66 100644
--- a/scripts/python/packages/parthenon_tools/parthenon_tools/movie2d.py
+++ b/scripts/python/packages/parthenon_tools/parthenon_tools/movie2d.py
@@ -21,7 +21,12 @@
 
 from argparse import ArgumentParser
 from pathlib import Path
-from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+from concurrent.futures import (
+    ThreadPoolExecutor,
+    ProcessPoolExecutor,
+    wait,
+    ALL_COMPLETED,
+)
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -208,17 +213,26 @@ def plot_dump(
         ye = yf
 
     # get tensor components
-    if len(q.shape) > 6:
-        raise ValueError(
-            "Tensor rank is higher than I can handle. "
-            + "Please revise the movie2d script"
-        )
-    if len(q.shape) == 6:
-        q = q[:, components[0], components[1], 0, :, :]
-    if len(q.shape) == 5:
-        q = q[:, components[-1], 0, :, :]
-    if len(q.shape) == 4:
-        q = q[:, 0, :, :]
+    ntensors = len(q.shape[1:-3])
+    if components:
+        if len(components) != ntensors:
+            raise ValueError(
+                "Tensor rank not the same as number of specified components: {}, {}, {}".format(
+                    len(components), ntensors, q.shape
+                )
+            )
+        # The first index of q is block index. Here we walk through
+        # the tensor components, slowest-first and, iteratively, fix
+        # q's slowest moving non-block index to the fixed tensor
+        # component. Then we move to the next index.
+        for c in components:
+            if c > (q.shape[1] - 1):
+                raise ValueError(
+                    "Component {} out of bounds. Shape = {}".format(c, q.shape)
+                )
+            q = q[:, c]
+    # move to 2d
+    q = q[..., 0, :, :]
 
     fig = plt.figure()
     p = fig.add_subplot(111, aspect=1)
@@ -299,15 +313,16 @@ def plot_dump(
     args.output_directory.mkdir(0o755, True, True)
     logger.info(f"Total files to process: {len(args.files)}")
 
-    components = [0, 0]
+    components = []
     if args.tc is not None:
         components = args.tc
     if args.vc is not None:
-        components = [0, args.vc]
+        components = [args.vc]
     do_swarm = args.swarm is not None
 
     _x = ProcessPoolExecutor if args.worker_type == "process" else ThreadPoolExecutor
     with _x(max_workers=args.workers) as pool:
+        futures = []
         for frame_id, file_name in enumerate(args.files):
             data = phdf(file_name)
 
@@ -376,40 +391,49 @@ def plot_dump(
             # NOTE: After doing 5 test on different precision, keeping 2 looks more promising
             current_time = format(round(data.Time, 2), ".2f")
             if args.debug_plot:
-                pool.submit(
-                    plot_dump,
-                    data.xg,
-                    data.yg,
-                    q,
-                    current_time,
-                    output_file,
-                    True,
-                    data.gid,
-                    data.xig,
-                    data.yig,
-                    data.xeg,
-                    data.yeg,
-                    components,
-                    swarmx,
-                    swarmy,
-                    swarmcolor,
-                    particlesize,
+                futures.append(
+                    pool.submit(
+                        plot_dump,
+                        data.xg,
+                        data.yg,
+                        q,
+                        current_time,
+                        output_file,
+                        True,
+                        data.gid,
+                        data.xig,
+                        data.yig,
+                        data.xeg,
+                        data.yeg,
+                        components,
+                        swarmx,
+                        swarmy,
+                        swarmcolor,
+                        particlesize,
+                    )
                 )
             else:
-                pool.submit(
-                    plot_dump,
-                    data.xng,
-                    data.yng,
-                    q,
-                    current_time,
-                    output_file,
-                    True,
-                    components=components,
-                    swarmx=swarmx,
-                    swarmy=swarmy,
-                    swarmcolor=swarmcolor,
-                    particlesize=particlesize,
+                futures.append(
+                    pool.submit(
+                        plot_dump,
+                        data.xng,
+                        data.yng,
+                        q,
+                        current_time,
+                        output_file,
+                        True,
+                        components=components,
+                        swarmx=swarmx,
+                        swarmy=swarmy,
+                        swarmcolor=swarmcolor,
+                        particlesize=particlesize,
+                    )
                 )
+        wait(futures, return_when=ALL_COMPLETED)
+        for future in futures:
+            exception = future.exception()
+            if exception:
+                raise exception
 
     if not ERROR_FLAG:
         logger.info("All frames produced.")
diff --git a/scripts/python/packages/parthenon_tools/parthenon_tools/phdf.py b/scripts/python/packages/parthenon_tools/parthenon_tools/phdf.py
index 39f87957c0c1..4e75439791b4 100644
--- a/scripts/python/packages/parthenon_tools/parthenon_tools/phdf.py
+++ b/scripts/python/packages/parthenon_tools/parthenon_tools/phdf.py
@@ -99,20 +99,28 @@ class phdf:
        BlockBounds[NumBlocks]: Bounds of all the blocks
 
     Class Functions:
-      Get(variable, flatten=True):
-            Gets data for the named variable. Returns None if variable
-            is not found in the file or the data if found.
-
-            If variable is a vector, each element of the returned
-            numpy array is a vector of that length.
-
-            Default is to return a flat array of length TotalCells.
-            However if flatten is set to False, a 4D (or 5D if vector)
-            array is returned that has dimensions [NumBlocks, Nz, Ny,
-            Nx] where NumBlocks is the total number of blocks, and Nz,
-            Ny, and Nx are the number of cells in the z, y, and x
-            directions respectively.
-
+      Get(variable, flatten=True, interior=False, average_to_cell_centers=True):
+           Reads data for the named variable from file.
+
+           Returns None if variable is not found in the file or the data
+           if found.
+
+           Default is to return a flat array of length
+           TotalCells*total tensor components. However if flatten is
+           set to False, a 4D (or 5D, or 6D if tensorial) array is
+           returned that has dimensions
+           [NumBlocks, tensor_components, Nz, Ny, Nx]
+           where NumBlocks is the total number of blocks, and Nz, Ny,
+           and Nx are the number of cells in the z, y, and x
+           directions respectively.
+
+           If flatten is False and interior is True, only non-ghost data
+           will be returned. This array will correspond to the coordinates
+           xg and xng, etc.
+
+           By default, node, face, and edge centered variables are
+           averaged to cell centers for visualization. This can be
+           disabled by setting average_to_cell_centers = False.
       ToLocation(index):
            Returns the location as an array [ib, bidx, iz, iy, ix]
            which convets the index into a block, index within that
@@ -296,6 +304,7 @@ def load_ghost_coords(coord_i):
 
         self.Variables = [k for k in f.keys()]
         self.varData = {}
+        self.varTopology = {}
         self.swarmData = {}
 
         # Construct a map of datasets to contained components,idx
@@ -492,46 +501,35 @@ def findBlockIdxInOther(self, other, ib, tol=1e-10, verbose=False):
             print(f"Block id: {ib} with bounds {myibBounds} not found in {other.file}")
         return None  # block index not found
 
-    def Get(self, variable, flatten=True, interior=False):
-        """
-        Reads data for the named variable from file.
+    def Get(self, variable, flatten=True, interior=False, average_to_cell_centers=True):
+        """Reads data for the named variable from file.
 
         Returns None if variable is not found in the file or the data
         if found.
 
-        If variable is a vector, each element of the returned numpy
-        array is a vector of that length.
-
-        Default is to return a flat array of length TotalCells.
-        However if flatten is set to False, a 4D (or 5D if vector)
-        array is returned that has dimensions [NumBlocks, Nz, Ny, Nx]
-        where NumBlocks is the total number of blocks, and Nz, Ny, and
-        Nx are the number of cells in the z, y, and x directions
-        respectively.
+        Default is to return a flat array of length
+        TotalCells*total tensor components. However if flatten is
+        set to False, a 4D (or 5D, or 6D if tensorial) array is
+        returned that has dimensions
+        [NumBlocks, tensor_components, Nz, Ny, Nx]
+        where NumBlocks is the total number of blocks, and Nz, Ny,
+        and Nx are the number of cells in the z, y, and x
+        directions respectively.
 
         If flatten is False and interior is True, only non-ghost data
         will be returned. This array will correspond to the coordinates
         xg and xng, etc.
+
+        By default, node, face, and edge centered variables are
+        averaged to cell centers for visualization. This can be
+        disabled by setting average_to_cell_centers = False.
         """
         try:
             if self.varData.get(variable) is None:
                 self.varData[variable] = self.fid[variable][:]
                 vShape = self.varData[variable].shape
                 if self.OutputFormatVersion < 3:
-                    if self.OutputFormatVersion == -1:
-                        vLen = vShape[-1]
-                    else:
-                        vLen = vShape[1]  # index 0 is the block, so we need to use 1
-                    # in versions < 3, if variable is a scalar remove the component index
-                    if vLen == 1:
-                        tmp = self.varData[variable].reshape(self.TotalCells)
-                        newShape = (
-                            self.NumBlocks,
-                            self.MeshBlockSize[2],
-                            self.MeshBlockSize[1],
-                            self.MeshBlockSize[0],
-                        )
-                        self.varData[variable] = tmp.reshape((newShape))
+                    raise ValueError("Unsupported output version")
 
         except KeyError:
             print(
@@ -542,114 +540,133 @@ def Get(self, variable, flatten=True, interior=False):
             )
             return None
 
+        try:
+            self.varTopology[variable] = (
+                self.fid[variable].attrs["TopologicalLocation"].astype(str)
+            )
+        except:
+            self.varTopology[variable] = "Cell"
+        average_able = (self.varTopology[variable] != "Cell") and (
+            self.varTopology[variable] != "None"
+        )
+
         vShape = self.varData[variable].shape
-        if flatten:
-            # TODO(tbd) remove legacy mode in next major rel.
-            if self.OutputFormatVersion == -1:
-                if np.prod(vShape) > self.TotalCells:
-                    return self.varData[variable][:].reshape(
-                        self.TotalCells, vShape[-1]
+        simdim = (vShape[-1] > 1) + (vShape[-2] > 1) + (vShape[-3] > 1)
+        if average_to_cell_centers and average_able:
+            v = self.varData[variable]
+            vnew = v.copy()
+            # Make vnew the proper shape to be averaged into
+            vnew = vnew[..., :-1]
+            if simdim >= 2:
+                vnew = vnew[..., :-1, :]
+            if simdim >= 3:
+                vnew = vnew[..., :-1, :, :]
+
+            # Average...
+            if self.varTopology[variable] == "Face":
+                if simdim == 1:
+                    vnew[:, 0] = 0.5 * (v[:, 0, ..., 1:] + v[:, 0, ..., :-1])
+                elif simdim == 2:
+                    vnew[:, 0] = 0.5 * (v[:, 0, ..., :-1, 1:] + v[:, 0, ..., :-1, :-1])
+                    vnew[:, 1] = 0.5 * (v[:, 1, ..., 1:, :-1] + v[:, 1, ..., :-1, :-1])
+                else:  # simdim == 3
+                    vnew[:, 0] = 0.5 * (
+                        v[:, 0, ..., :-1, :-1, 1:] + v[:, 0, ..., :-1, :-1, :-1]
                     )
-                else:
-                    return self.varData[variable][:].reshape(self.TotalCells)
-
-            elif self.OutputFormatVersion == 2:
-                if np.prod(vShape) > self.TotalCells:
-                    ret = np.empty(
-                        (vShape[1], self.TotalCells), dtype=self.varData[variable].dtype
+                    vnew[:, 1] = 0.5 * (
+                        v[:, 1, ..., :-1, 1:, :-1] + v[:, 1, ..., :-1, :-1, :-1]
                     )
-                    ret[:] = np.nan
-                    for i in range(vShape[1]):
-                        ret[i] = self.varData[variable][:, i, :, :, :].ravel()
-                    assert (ret != np.nan).all()
-                    return ret
-                else:
-                    return self.varData[variable][:].reshape(self.TotalCells)
-
-            elif self.OutputFormatVersion == 3:
-                # Check for cell-centered data
-                if (
-                    vShape[-1] == self.MeshBlockSize[0]
-                    and vShape[-2] == self.MeshBlockSize[1]
-                    and vShape[-3] == self.MeshBlockSize[2]
-                ):
-                    fieldShape = vShape[1:-3]
-                    totalFieldEntries = np.prod(fieldShape)
-                    ndim = len(fieldShape)
-                    if ndim == 0:
-                        return self.varData[variable].ravel()
-                    else:
-                        if ndim == 1:
-                            ret = np.empty(
-                                (vShape[1], self.TotalCells),
-                                dtype=self.varData[variable].dtype,
-                            )
-                            ret[:] = np.nan
-                            for i in range(vShape[1]):
-                                ret[i] = self.varData[variable][:, i, :, :, :].ravel()
-                            assert (ret != np.nan).all()
-                            return ret
-                        elif ndim == 2:
-                            ret = np.empty(
-                                (vShape[1] * vShape[2], self.TotalCells),
-                                dtype=self.varData[variable].dtype,
-                            )
-                            ret[:] = np.nan
-                            for i in range(vShape[1]):
-                                for j in range(vShape[2]):
-                                    ret[i + vShape[1] * j] = self.varData[variable][
-                                        :, i, j, :, :, :
-                                    ].ravel()
-                            assert (ret != np.nan).all()
-                            return ret
-                        else:
-                            ret = np.empty(
-                                (vShape[1] * vShape[2] * vShape[3], self.TotalCells),
-                                dtype=self.varData[variable].dtype,
-                            )
-                            ret[:] = np.nan
-                            for i in range(vShape[1]):
-                                for j in range(vShape[2]):
-                                    for k in range(vShape[3]):
-                                        ret[
-                                            i + vShape[1] * (j + vShape[2] * k)
-                                        ] = self.varData[variable][
-                                            :, i, j, k, :, :, :
-                                        ].ravel()
-                            assert (ret != np.nan).all()
-                            return ret
-                else:
-                    # Not cell-based variable
-                    raise Exception(
-                        f"Flattening only supported for cell-based variables but requested for {variable}"
+                    vnew[:, 2] = 0.5 * (
+                        v[:, 2, ..., 1:, :-1, :-1] + v[:, 2, ..., :-1, :-1, :-1]
+                    )
+            if self.varTopology[variable] == "Edge":
+                if simdim == 1:
+                    # nothing to do for E1 == :,0
+                    # E2 and E3 behave like node-centered data
+                    vnew[:, 1] = 0.5 * (v[:, 1, ..., 1:] + v[:, 1, ..., :-1])
+                    vnew[:, 2] = 0.5 * (v[:, 2, ..., 1:] + v[:, 2, ..., :-1])
+                if simdim == 2:
+                    # E1 and E2 behave like face centered data
+                    vnew[:, 0] = 0.5 * (v[:, 0, ..., 1:, :-1] + v[:, 0, ..., :-1, :-1])
+                    vnew[:, 1] = 0.5 * (v[:, 1, ..., :-1, 1:] + v[:, 1, ..., :-1, :-1])
+                    # E3 behaves like node-centered data
+                    vnew[:, 2] = 0.25 * (
+                        v[:, 2, ..., 1:, 1:]
+                        + v[:, 2, ..., 1:, :-1]
+                        + v[:, 2, ..., :-1, 1:]
+                        + v[:, 2, ..., :-1, :-1]
+                    )
+                else:  # simdim == 3
+                    vnew[:, 0] = 0.25 * (
+                        v[:, 0, ..., 1:, 1:, :-1]
+                        + v[:, 0, ..., 1:, :-1, :-1]
+                        + v[:, 0, ..., :-1, 1:, :-1]
+                        + v[:, 0, ..., :-1, :-1, :-1]
+                    )
+                    vnew[:, 1] = 0.25 * (
+                        v[:, 1, ..., 1:, :-1, 1:]
+                        + v[:, 1, ..., 1:, :-1, :-1]
+                        + v[:, 1, ..., :-1, :-1, 1:]
+                        + v[:, 1, ..., :-1, :-1, :-1]
                     )
+                    vnew[:, 2] = 0.25 * (
+                        v[:, 2, ..., :-1, 1:, 1:]
+                        + v[:, 2, ..., :-1, 1:, :-1]
+                        + v[:, 2, ..., :-1, :-1, 1:]
+                        + v[:, 2, ..., :-1, :-1, :-1]
+                    )
+            elif self.varTopology[variable] == "Node":
+                if simdim == 1:
+                    vnew = (1.0 / 2.0) * (v[..., 1:] + v[..., :-1])
+                elif simdim == 2:
+                    vnew = (1.0 / 4.0) * (
+                        v[..., 1:, 1:]
+                        + v[..., 1:, :-1]
+                        + v[..., :-1, 1:]
+                        + v[..., :-1, :-1]
+                    )
+                else:  # simdim == 3
+                    vnew = (1.0 / 8.0) * (
+                        v[..., 1:, 1:, 1:]
+                        + v[..., :-1, :-1, :-1]
+                        + v[..., 1:, 1:, :-1]
+                        + v[..., 1:, :-1, 1:]
+                        + v[..., :-1, 1:, 1:]
+                        + v[..., :-1, :-1, 1:]
+                        + v[..., :-1, 1:, :-1]
+                        + v[..., 1:, :-1, :-1]
+                    )
+            else:
+                raise ValueError(
+                    "Topology {} for var {} cannot be averaged.".format(
+                        self.varTopology[variable], variable
+                    )
+                )
+            v = vnew
+            self.varData[variable] = v
+
+        if flatten:
+            nblocks = vShape[0]
+            if self.varTopology[variable] == "None":
+                remaining_size = np.product(vShape[1:])
+                return self.varData[variable].reshape(nblocks, remaining_size)
+            else:
+                preserved_shape = vShape[:-3]
+                remaining_size = np.product(vShape[-3:])
+                return self.varData[variable].reshape(*preserved_shape, remaining_size)
 
         if self.IncludesGhost and interior:
             nghost = self.NGhost
-            # TODO(tbd) remove legacy mode in next major rel.
-            if self.OutputFormatVersion == -1:
-                if vShape[3] == 1:
-                    return self.varData[variable][:, :, :, :]
-                elif vShape[2] == 1:
-                    return self.varData[variable][:, :, :, nghost:-nghost]
-                elif vShape[1] == 1:
-                    return self.varData[variable][:, :, nghost:-nghost, nghost:-nghost]
-                else:
-                    return self.varData[variable][
-                        :, nghost:-nghost, nghost:-nghost, nghost:-nghost
-                    ]
+            if vShape[-1] == 1:
+                return self.varData[variable][:]
+            elif vShape[-2] == 1:
+                return self.varData[variable][..., nghost:-nghost]
+            elif vShape[-3] == 1:
+                return self.varData[variable][..., nghost:-nghost, nghost:-nghost]
             else:
-                print(vShape)
-                if vShape[-1] == 1:
-                    return self.varData[variable][:]
-                elif vShape[-2] == 1:
-                    return self.varData[variable][..., nghost:-nghost]
-                elif vShape[-3] == 1:
-                    return self.varData[variable][..., nghost:-nghost, nghost:-nghost]
-                else:
-                    return self.varData[variable][
-                        ..., nghost:-nghost, nghost:-nghost, nghost:-nghost
-                    ]
+                return self.varData[variable][
+                    ..., nghost:-nghost, nghost:-nghost, nghost:-nghost
+                ]
         return self.varData[variable][:]
 
     def GetSwarm(self, swarm):
@@ -720,16 +737,11 @@ def GetComponents(self, components, flatten=True):
                         # If dataset isn't a vector, just save dataset
                         component_data[component] = dataset
                     else:
-                        # TODO(tbd) remove legacy mode in next major rel.
-                        # Data is a vector, save only the component
-                        if self.OutputFormatVersion == -1:
-                            component_data[component] = dataset[..., idx]
-                        else:
-                            if flatten:
-                                component_data[component] = dataset[idx, ...]
+                        if flatten:
+                            component_data[component] = dataset[idx, ...]
                             # need to take leading block index into account
-                            else:
-                                component_data[component] = dataset[:, idx, ...]
+                        else:
+                            component_data[component] = dataset[:, idx, ...]
 
         return component_data
 
diff --git a/src/basic_types.hpp b/src/basic_types.hpp
index b36383aa2827..886788727d48 100644
--- a/src/basic_types.hpp
+++ b/src/basic_types.hpp
@@ -15,6 +15,7 @@
 
 #include <limits>
 #include <string>
+#include <tuple>
 #include <unordered_map>
 #include <vector>
 
@@ -208,6 +209,8 @@ struct SimTime {
 template <typename T>
 using Dictionary = std::unordered_map<std::string, T>;
 
+template <typename T>
+using Triple_t = std::tuple<T, T, T>;
 } // namespace parthenon
 
 #endif // BASIC_TYPES_HPP_
diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp
index 707f8a6bd917..19c8b1482c5d 100644
--- a/src/interface/metadata.hpp
+++ b/src/interface/metadata.hpp
@@ -348,6 +348,21 @@ class Metadata {
   }
   static int num_flags;
 
+  static std::string LocationToString(MetadataFlag flag) {
+    if (flag == Cell) {
+      return "Cell";
+    } else if (flag == Face) {
+      return "Face";
+    } else if (flag == Edge) {
+      return "Edge";
+    } else if (flag == Node) {
+      return "Node";
+    } else if (flag == None) {
+      return "None";
+    }
+    PARTHENON_THROW("Unknown topology flag");
+  }
+
   // Sparse threshold routines
   void SetSparseThresholds(parthenon::Real alloc, parthenon::Real dealloc,
                            parthenon::Real default_val = 0.0) {
@@ -447,6 +462,7 @@ class Metadata {
 
     PARTHENON_THROW("No topology flag set");
   }
+  std::string WhereAsString() const { return LocationToString(Where()); }
 
   bool IsMeshTied() const { return (Where() != None); }
 
diff --git a/src/interface/variable.hpp b/src/interface/variable.hpp
index 6542d99e7e43..42c067efb9d7 100644
--- a/src/interface/variable.hpp
+++ b/src/interface/variable.hpp
@@ -81,14 +81,16 @@ class Variable {
   KOKKOS_FORCEINLINE_FUNCTION
   auto GetDim(const int i) const {
     // we can't query data.GetDim() here because data may be unallocated
-    assert(0 < i && i <= MAX_VARIABLE_DIMENSION && "ParArrayNDs are max 6D");
+    PARTHENON_DEBUG_REQUIRE(0 < i && i <= MAX_VARIABLE_DIMENSION, "Index out of bounds");
     return dims_[i - 1];
   }
 
+  auto GetDim() const { return dims_; }
+
   KOKKOS_FORCEINLINE_FUNCTION
   auto GetCoarseDim(const int i) const {
     // we can't query coarse_s.GetDim() here because it may be unallocated
-    assert(0 < i && i <= MAX_VARIABLE_DIMENSION && "ParArrayNDs are max 6D");
+    PARTHENON_DEBUG_REQUIRE(0 < i && i <= MAX_VARIABLE_DIMENSION, "Index out of bounds");
     return coarse_dims_[i - 1];
   }
 
@@ -212,7 +214,7 @@ class ParticleVariable {
 
   KOKKOS_FORCEINLINE_FUNCTION
   auto GetDim(const int i) const {
-    PARTHENON_DEBUG_REQUIRE(0 < i && i <= 6, "ParArrayNDGenerics are max 6D");
+    PARTHENON_DEBUG_REQUIRE(0 < i && i <= 6, "Index out of bounds");
     return dims_[i - 1];
   }
 
diff --git a/src/outputs/ascent.cpp b/src/outputs/ascent.cpp
index 13af3b50f613..bab4632485f1 100644
--- a/src/outputs/ascent.cpp
+++ b/src/outputs/ascent.cpp
@@ -175,7 +175,7 @@ void AscentOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm,
       if (!var->IsSet(Metadata::Cell)) {
         continue;
       }
-      const auto var_info = VarInfo(var);
+      const auto var_info = VarInfo(var, bounds);
 
       for (int icomp = 0; icomp < var_info.num_components; ++icomp) {
         auto const data = Kokkos::subview(var->data, 0, 0, icomp, Kokkos::ALL(),
diff --git a/src/outputs/histogram.cpp b/src/outputs/histogram.cpp
index 88c6ced2ecd3..5944f6e8a889 100644
--- a/src/outputs/histogram.cpp
+++ b/src/outputs/histogram.cpp
@@ -472,12 +472,13 @@ void HistogramOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm
     using namespace HDF5;
     H5P const pl_xfer = H5P::FromHIDCheck(H5Pcreate(H5P_DATASET_XFER));
 
-    // As we're reusing the interface from the existing hdf5 output, we have to define
-    // everything as 7D arrays.
-    // Counts will be set for each histogram individually below.
-    const std::array<hsize_t, H5_NDIM> local_offset({0, 0, 0, 0, 0, 0, 0});
-    std::array<hsize_t, H5_NDIM> local_count({0, 0, 0, 0, 0, 0, 0});
-    std::array<hsize_t, H5_NDIM> global_count({0, 0, 0, 0, 0, 0, 0});
+    // As we're reusing the interface from the existing hdf5 output,
+    // we have to define everything as H5_NDIM arrays. Counts will be
+    // set for each histogram individually below. All
+    // zero-initialized
+    const std::array<hsize_t, H5_NDIM> local_offset = {0};
+    std::array<hsize_t, H5_NDIM> local_count = {0};
+    std::array<hsize_t, H5_NDIM> global_count = {0};
 
     // create/open HDF5 file
     const std::string filename = GenerateFilename_(pin, tm, signal);
diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp
index 52ced1ee631b..cd1e49241e1d 100644
--- a/src/outputs/output_utils.cpp
+++ b/src/outputs/output_utils.cpp
@@ -33,6 +33,109 @@
 namespace parthenon {
 namespace OutputUtils {
 
+Triple_t<int> VarInfo::GetNumKJI(const IndexDomain domain) const {
+  int nx3 = 1, nx2 = 1, nx1 = 1;
+  // TODO(JMM): I know that this could be done by hand, but I'd rather
+  // rely on the loop bounds machinery and this should be cheap.
+  for (auto el : topological_elements) {
+    nx3 = std::max(nx3, cellbounds.ncellsk(domain, el));
+    nx2 = std::max(nx2, cellbounds.ncellsj(domain, el));
+    nx1 = std::max(nx1, cellbounds.ncellsi(domain, el));
+  }
+  return std::make_tuple(nx3, nx2, nx1);
+}
+
+Triple_t<IndexRange> VarInfo::GetPaddedBoundsKJI(const IndexDomain domain) const {
+  // TODO(JMM): I know that this could be done by hand, but I'd rather
+  // rely on the loop bounds machinery and this should be cheap.
+  int ks = 0, ke = 0, js = 0, je = 0, is = 0, ie = 0;
+  for (auto el : topological_elements) {
+    auto kb = cellbounds.GetBoundsK(domain, el);
+    auto jb = cellbounds.GetBoundsJ(domain, el);
+    auto ib = cellbounds.GetBoundsI(domain, el);
+    ks = kb.s; // pads are only upper indices
+    js = jb.s;
+    is = ib.s;
+    ke = std::max(ke, kb.e);
+    je = std::max(je, jb.e);
+    ie = std::max(ie, ib.e);
+  }
+  IndexRange kb{ks, ke}, jb{js, je}, ib{is, ie};
+  return std::make_tuple(kb, jb, ib);
+}
+
+int VarInfo::Size() const {
+  return std::accumulate(nx_.begin(), nx_.end(), 1, std::multiplies<int>());
+}
+
+// Includes topological element shape
+int VarInfo::TensorSize() const {
+  if (where == MetadataFlag({Metadata::None})) {
+    return Size();
+  } else {
+    return std::accumulate(rnx_.begin() + 1, rnx_.end() - 3, 1, std::multiplies<int>());
+  }
+}
+
+int VarInfo::FillSize(const IndexDomain domain) const {
+  if (where == MetadataFlag({Metadata::None})) {
+    return Size();
+  } else {
+    auto [n3, n2, n1] = GetNumKJI(domain);
+    return ntop_elems * TensorSize() * n3 * n2 * n1;
+  }
+}
+
+// number of elements of data that describe variable shape
+int VarInfo::GetNDim() const {
+  // 3 cell indices, tensor rank, topological element index if needed
+  return (where == MetadataFlag({Metadata::None})) ? tensor_rank
+                                                   : (3 + tensor_rank + element_matters);
+}
+
+// Returns full shape as read to/written from I/O, with 1-padding.
+std::vector<int> VarInfo::GetPaddedShape(IndexDomain domain) const {
+  std::vector<int> out = GetRawShape();
+  if (where != MetadataFlag({Metadata::None})) {
+    auto [nx3, nx2, nx1] = GetNumKJI(domain);
+    out[0] = nx3;
+    out[1] = nx2;
+    out[2] = nx1;
+  }
+  return out;
+}
+std::vector<int> VarInfo::GetPaddedShapeReversed(IndexDomain domain) const {
+  std::vector<int> out(rnx_.begin(), rnx_.end());
+  if (where != MetadataFlag({Metadata::None})) {
+    auto [nx3, nx2, nx1] = GetNumKJI(domain);
+    out[VNDIM - 3] = nx3;
+    out[VNDIM - 2] = nx2;
+    out[VNDIM - 1] = nx1;
+  }
+  return out;
+}
+
+std::vector<int> VarInfo::GetRawShape() const {
+  return std::vector<int>(nx_.begin(), nx_.end());
+}
+
+int VarInfo::GetDim(int i) const {
+  PARTHENON_DEBUG_REQUIRE(0 < i && i < VNDIM, "Index out of bounds");
+  return nx_[i - 1];
+}
+
+std::vector<VarInfo> VarInfo::GetAll(const VariableVector<Real> &vars,
+                                     const IndexShape &cellbounds) {
+  std::vector<VarInfo> out;
+  for (const auto &v : vars) {
+    out.emplace_back(v, cellbounds);
+  }
+  std::sort(out.begin(), out.end(),
+            [](const VarInfo &a, const VarInfo &b) { return a.label < b.label; });
+
+  return out;
+}
+
 void SwarmInfo::AddOffsets(const SP_Swarm &swarm) {
   std::size_t count = swarm->GetNumActive();
   std::size_t offset = (offsets.size() > 0) ? offsets.back() : 0;
diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp
index db6353090cbf..10cb8fa887e9 100644
--- a/src/outputs/output_utils.hpp
+++ b/src/outputs/output_utils.hpp
@@ -19,19 +19,24 @@
 #define OUTPUTS_OUTPUT_UTILS_HPP_
 
 // C++
+#include <algorithm>
 #include <array>
+#include <functional>
 #include <map>
 #include <memory>
+#include <numeric>
 #include <set>
 #include <sstream>
 #include <string>
 #include <tuple>
+#include <utility>
 #include <vector>
 
 // Parthenon
 #include "basic_types.hpp"
 #include "interface/metadata.hpp"
 #include "interface/variable.hpp"
+#include "kokkos_abstraction.hpp"
 #include "mesh/domain.hpp"
 #include "mesh/mesh.hpp"
 #include "mesh/meshblock.hpp"
@@ -41,31 +46,93 @@ namespace parthenon {
 namespace OutputUtils {
 // Helper struct containing some information about a variable
 struct VarInfo {
+ public:
+  static constexpr int VNDIM = MAX_VARIABLE_DIMENSION;
   std::string label;
   int num_components;
-  int nx6;
-  int nx5;
-  int nx4;
-  int nx3;
-  int nx2;
-  int nx1;
   int tensor_rank; // 0- to 3-D for cell-centered variables, 0- to 6-D for arbitrary shape
                    // variables
   MetadataFlag where;
   bool is_sparse;
   bool is_vector;
+  IndexShape cellbounds;
   std::vector<std::string> component_labels;
-  int Size() const { return nx6 * nx5 * nx4 * nx3 * nx2 * nx1; }
+  // list of topological elements in variable... e.g., Face1, Face2, etc
+  std::vector<TopologicalElement> topological_elements;
+  // how many topological elements are stored in variable, e.g., 3 for
+  // face and edge vars.
+  int ntop_elems;
+  // whether or not topological element matters.
+  bool element_matters;
+
+  Triple_t<int> GetNumKJI(const IndexDomain domain) const;
+  Triple_t<IndexRange> GetPaddedBoundsKJI(const IndexDomain domain) const;
+
+  int Size() const;
+  // Includes topological element shape
+  int TensorSize() const;
+  // Size of region that needs to be filled with 0s if not allocated
+  int FillSize(const IndexDomain domain) const;
+  // number of elements of data that describe variable shape
+  int GetNDim() const;
+
+  template <typename T>
+  int FillShape(const IndexDomain domain, T *data) const {
+    if (where == MetadataFlag({Metadata::None})) {
+      for (int i = 0; i < tensor_rank; ++i) {
+        data[i] = static_cast<T>(rnx_[rnx_.size() - tensor_rank + i]);
+      }
+    } else {
+      // For nx1,nx2,nx3 find max storage required in each direction
+      // accross topological elements. Unused indices will be written but
+      // empty.
+      auto [nx3, nx2, nx1] = GetNumKJI(domain);
+      // fill topological element, if relevant
+      if (element_matters) {
+        data[0] = ntop_elems;
+      }
+      // fill the tensor rank
+      for (int i = 0; i < tensor_rank; ++i) {
+        data[i + element_matters] =
+            static_cast<T>(rnx_[rnx_.size() - 3 - tensor_rank + i]);
+      }
+      // fill cell indices
+      data[tensor_rank + element_matters] = static_cast<T>(nx3);
+      data[tensor_rank + element_matters + 1] = static_cast<T>(nx2);
+      data[tensor_rank + element_matters + 2] = static_cast<T>(nx1);
+    }
+    return GetNDim();
+  }
+
+  template <typename T, typename... Args>
+  int FillShape(const IndexDomain domain, T *head, Args... args) const {
+    int ndim_head = FillShape(domain, head);
+    int ndim_tail = FillShape(domain, std::forward<Args>(args)...);
+    // this check should be impossible to trigger but... just to be safe
+    PARTHENON_DEBUG_REQUIRE(ndim_head == ndim_tail,
+                            "Shape can't change for different arrays");
+    return ndim_tail;
+  }
+
+  // Returns full shape as read to/written from I/O, with 1-padding.
+  std::vector<int> GetPaddedShape(IndexDomain domain) const;
+  std::vector<int> GetPaddedShapeReversed(IndexDomain domain) const;
+  // nx accessors
+  std::vector<int> GetRawShape() const;
+  int GetDim(int i) const;
 
   VarInfo() = delete;
 
   // TODO(JMM): Separate this into an implementation file again?
   VarInfo(const std::string &label, const std::vector<std::string> &component_labels_,
-          int num_components, int nx6, int nx5, int nx4, int nx3, int nx2, int nx1,
-          Metadata metadata, bool is_sparse, bool is_vector)
-      : label(label), num_components(num_components), nx6(nx6), nx5(nx5), nx4(nx4),
-        nx3(nx3), nx2(nx2), nx1(nx1), tensor_rank(metadata.Shape().size()),
-        where(metadata.Where()), is_sparse(is_sparse), is_vector(is_vector) {
+          int num_components, std::array<int, VNDIM> nx, Metadata metadata,
+          const std::vector<TopologicalElement> topological_elements, bool is_sparse,
+          bool is_vector, const IndexShape &cellbounds)
+      : label(label), num_components(num_components), nx_(nx),
+        tensor_rank(metadata.Shape().size()), where(metadata.Where()),
+        topological_elements(topological_elements), is_sparse(is_sparse),
+        is_vector(is_vector), cellbounds(cellbounds), rnx_(nx_.rbegin(), nx_.rend()),
+        ntop_elems(topological_elements.size()), element_matters(ntop_elems > 1) {
     if (num_components <= 0) {
       std::stringstream msg;
       msg << "### ERROR: Got variable " << label << " with " << num_components
@@ -96,11 +163,22 @@ struct VarInfo {
     }
   }
 
-  explicit VarInfo(const std::shared_ptr<Variable<Real>> &var)
+  explicit VarInfo(const std::shared_ptr<Variable<Real>> &var,
+                   const IndexShape &cellbounds)
       : VarInfo(var->label(), var->metadata().getComponentLabels(), var->NumComponents(),
-                var->GetDim(6), var->GetDim(5), var->GetDim(4), var->GetDim(3),
-                var->GetDim(2), var->GetDim(1), var->metadata(), var->IsSparse(),
-                var->IsSet(Metadata::Vector)) {}
+                var->GetDim(), var->metadata(), var->GetTopologicalElements(),
+                var->IsSparse(), var->IsSet(Metadata::Vector), cellbounds) {}
+
+  static std::vector<VarInfo> GetAll(const VariableVector<Real> &vars,
+                                     const IndexShape &cellbounds);
+
+  bool operator==(const std::string &other) const { return other == label; }
+
+ private:
+  // TODO(JMM): Probably nx_ and rnx_ both not necessary... but it was
+  // easiest for me to reason about it this way.
+  std::array<int, VNDIM> nx_;
+  std::vector<int> rnx_;
 };
 
 struct SwarmVarInfo {
@@ -215,32 +293,35 @@ std::vector<T> FlattenBlockInfo(Mesh *pm, int shape, Function_t f) {
 }
 
 // mirror must be provided because copying done externally
-template <typename Data_t, typename idx_t, typename Function_t>
-void PackOrUnpackVar(MeshBlock *pmb, Variable<Real> *pvar, bool do_ghosts, idx_t &idx,
-                     std::vector<Data_t> &data, Function_t f) {
-  const auto &Nt = pvar->GetDim(6);
-  const auto &Nu = pvar->GetDim(5);
-  const auto &Nv = pvar->GetDim(4);
+template <typename idx_t, typename Function_t>
+void PackOrUnpackVar(const VarInfo &info, bool do_ghosts, idx_t &idx, Function_t f) {
   const IndexDomain domain = (do_ghosts ? IndexDomain::entire : IndexDomain::interior);
-  IndexRange kb, jb, ib;
-  if (pvar->metadata().Where() == MetadataFlag(Metadata::Cell)) {
-    kb = pmb->cellbounds.GetBoundsK(domain);
-    jb = pmb->cellbounds.GetBoundsJ(domain);
-    ib = pmb->cellbounds.GetBoundsI(domain);
-    // TODO(JMM): Add topological elements here
-  } else { // metadata none
-    kb = {0, pvar->GetDim(3) - 1};
-    jb = {0, pvar->GetDim(2) - 1};
-    ib = {0, pvar->GetDim(1) - 1};
+  // shape as written to or read from. contains additional padding
+  // in orthogonal directions.
+  // e.g., Face1-centered var is shape (N1+1)x(N2+1)x(N3+1)
+  // format is
+  // topological_elems x tensor_elems x block_elems
+  const auto shape = info.GetPaddedShapeReversed(domain);
+  // TODO(JMM): Should I hide this inside VarInfo?
+  auto [kb, jb, ib] = info.GetPaddedBoundsKJI(domain);
+  if (info.where == MetadataFlag({Metadata::None})) {
+    kb.s = 0;
+    kb.e = shape[4];
+    jb.s = 0;
+    jb.e = shape[5];
+    ib.s = 0;
+    ib.e = shape[6];
   }
-  for (int t = 0; t < Nt; ++t) {
-    for (int u = 0; u < Nu; ++u) {
-      for (int v = 0; v < Nv; ++v) {
-        for (int k = kb.s; k <= kb.e; ++k) {
-          for (int j = jb.s; j <= jb.e; ++j) {
-            for (int i = ib.s; i <= ib.e; ++i) {
-              f(idx, t, u, v, k, j, i);
-              idx++;
+  for (int topo = 0; topo < shape[0]; ++topo) {
+    for (int t = 0; t < shape[1]; ++t) {
+      for (int u = 0; u < shape[2]; ++u) {
+        for (int v = 0; v < shape[3]; ++v) {
+          for (int k = kb.s; k <= kb.e; ++k) {
+            for (int j = jb.s; j <= jb.e; ++j) {
+              for (int i = ib.s; i <= ib.e; ++i) {
+                f(idx, topo, t, u, v, k, j, i);
+                idx++;
+              }
             }
           }
         }
diff --git a/src/outputs/parthenon_hdf5.cpp b/src/outputs/parthenon_hdf5.cpp
index a2c4bff31088..46413d3ea0f0 100644
--- a/src/outputs/parthenon_hdf5.cpp
+++ b/src/outputs/parthenon_hdf5.cpp
@@ -82,9 +82,10 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
 
   auto const &first_block = *(pm->block_list.front());
 
-  const IndexRange out_ib = first_block.cellbounds.GetBoundsI(theDomain);
-  const IndexRange out_jb = first_block.cellbounds.GetBoundsJ(theDomain);
-  const IndexRange out_kb = first_block.cellbounds.GetBoundsK(theDomain);
+  const auto &cellbounds = first_block.cellbounds;
+  const IndexRange out_ib = cellbounds.GetBoundsI(theDomain);
+  const IndexRange out_jb = cellbounds.GetBoundsJ(theDomain);
+  const IndexRange out_kb = cellbounds.GetBoundsK(theDomain);
 
   auto const nx1 = out_ib.e - out_ib.s + 1;
   auto const nx2 = out_jb.e - out_jb.s + 1;
@@ -249,18 +250,9 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
       return GetAnyVariables(var_vec, output_params.variables);
     }
   };
-
-  // get list of all vars, just use the first block since the list is the same for all
-  // blocks
-  std::vector<VarInfo> all_vars_info;
-  const auto vars = get_vars(pm->block_list.front());
-  for (auto &v : vars) {
-    all_vars_info.emplace_back(v);
-  }
-
-  // sort alphabetically
-  std::sort(all_vars_info.begin(), all_vars_info.end(),
-            [](const VarInfo &a, const VarInfo &b) { return a.label < b.label; });
+  // get list of all vars, just use the first block since the list is
+  // the same for all blocks
+  auto all_vars_info = VarInfo::GetAll(get_vars(pm->block_list.front()), cellbounds);
 
   // We need to add information about the sparse variables to the HDF5 file, namely:
   // 1) Which variables are sparse
@@ -308,74 +300,34 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
     memset(tmpData.data(), 0, tmpData.size() * sizeof(OutT));
 
     const std::string var_name = vinfo.label;
-    const hsize_t nx6 = vinfo.nx6;
-    const hsize_t nx5 = vinfo.nx5;
-    const hsize_t nx4 = vinfo.nx4;
-
-    hsize_t local_offset[H5_NDIM] = {my_offset, 0, 0, 0, 0, 0, 0};
-    hsize_t local_count[H5_NDIM] = {static_cast<hsize_t>(num_blocks_local),
-                                    static_cast<hsize_t>(nx6),
-                                    static_cast<hsize_t>(nx5),
-                                    static_cast<hsize_t>(nx4),
-                                    static_cast<hsize_t>(nx3),
-                                    static_cast<hsize_t>(nx2),
-                                    static_cast<hsize_t>(nx1)};
-    hsize_t global_count[H5_NDIM] = {static_cast<hsize_t>(max_blocks_global),
-                                     static_cast<hsize_t>(nx6),
-                                     static_cast<hsize_t>(nx5),
-                                     static_cast<hsize_t>(nx4),
-                                     static_cast<hsize_t>(nx3),
-                                     static_cast<hsize_t>(nx2),
-                                     static_cast<hsize_t>(nx1)};
-
-    std::vector<hsize_t> alldims({nx6, nx5, nx4, static_cast<hsize_t>(vinfo.nx3),
-                                  static_cast<hsize_t>(vinfo.nx2),
-                                  static_cast<hsize_t>(vinfo.nx1)});
-
-    int ndim = -1;
-#ifndef PARTHENON_DISABLE_HDF5_COMPRESSION
-    // we need chunks to enable compression
-    std::array<hsize_t, H5_NDIM> chunk_size({1, 1, 1, 1, 1, 1, 1});
-#endif
-    if (vinfo.where == MetadataFlag(Metadata::Cell)) {
-      ndim = 3 + vinfo.tensor_rank + 1;
-      for (int i = 0; i < vinfo.tensor_rank; i++) {
-        local_count[1 + i] = global_count[1 + i] = alldims[3 - vinfo.tensor_rank + i];
-      }
-      local_count[vinfo.tensor_rank + 1] = global_count[vinfo.tensor_rank + 1] = nx3;
-      local_count[vinfo.tensor_rank + 2] = global_count[vinfo.tensor_rank + 2] = nx2;
-      local_count[vinfo.tensor_rank + 3] = global_count[vinfo.tensor_rank + 3] = nx1;
 
-#ifndef PARTHENON_DISABLE_HDF5_COMPRESSION
-      if (output_params.hdf5_compression_level > 0) {
-        for (int i = ndim - 3; i < ndim; i++) {
-          chunk_size[i] = local_count[i];
-        }
-      }
-#endif
-    } else if (vinfo.where == MetadataFlag(Metadata::None)) {
-      ndim = vinfo.tensor_rank + 1;
-      for (int i = 0; i < vinfo.tensor_rank; i++) {
-        local_count[1 + i] = global_count[1 + i] = alldims[6 - vinfo.tensor_rank + i];
-      }
+    hsize_t local_offset[H5_NDIM];
+    std::fill(local_offset + 1, local_offset + H5_NDIM, 0);
+    local_offset[0] = my_offset;
 
-#ifndef PARTHENON_DISABLE_HDF5_COMPRESSION
-      if (output_params.hdf5_compression_level > 0) {
-        int nchunk_indices = std::min<int>(vinfo.tensor_rank, 3);
-        for (int i = ndim - nchunk_indices; i < ndim; i++) {
-          chunk_size[i] = alldims[6 - nchunk_indices + i];
-        }
-      }
-#endif
-    } else {
-      PARTHENON_THROW("Only Cell and None locations supported!");
-    }
+    hsize_t local_count[H5_NDIM];
+    local_count[0] = static_cast<hsize_t>(num_blocks_local);
+
+    hsize_t global_count[H5_NDIM];
+    global_count[0] = static_cast<hsize_t>(max_blocks_global);
+
+    // block index + variable on block dimensions
+    int ndim = 1 + vinfo.FillShape(theDomain, &(local_count[1]), &(global_count[1]));
 
 #ifndef PARTHENON_DISABLE_HDF5_COMPRESSION
-    PARTHENON_HDF5_CHECK(H5Pset_chunk(pl_dcreate, ndim, chunk_size.data()));
-    // Do not run the pipeline if compression is soft disabled.
-    // By default data would still be passed, which may result in slower output.
+    // we need chunks to enable compression. Do not run the pipeline
+    // if compression is soft disabled.  By default data would still
+    // be passed, which may result in slower output.
     if (output_params.hdf5_compression_level > 0) {
+      std::array<hsize_t, H5_NDIM> chunk_size;
+      std::fill(chunk_size.begin(), chunk_size.end(), 1);
+      for (int i = 1; i < ndim; ++i) {
+        chunk_size[i] = local_count[i];
+      }
+      if (vinfo.where != MetadataFlag(Metadata::None)) {
+        std::fill(&(chunk_size[0]), &(chunk_size[0]) + ndim - 3, 1);
+      }
+      PARTHENON_HDF5_CHECK(H5Pset_chunk(pl_dcreate, ndim, chunk_size.data()));
       PARTHENON_HDF5_CHECK(
           H5Pset_deflate(pl_dcreate, std::min(9, output_params.hdf5_compression_level)));
     }
@@ -398,9 +350,9 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
         if (v->IsAllocated() && (var_name == v->label())) {
           auto v_h = v->data.GetHostMirrorAndCopy();
           OutputUtils::PackOrUnpackVar(
-              pmb.get(), v.get(), output_params.include_ghost_zones, index, tmpData,
-              [&](auto index, int t, int u, int v, int k, int j, int i) {
-                tmpData[index] = static_cast<OutT>(v_h(t, u, v, k, j, i));
+              vinfo, output_params.include_ghost_zones, index,
+              [&](auto index, int topo, int t, int u, int v, int k, int j, int i) {
+                tmpData[index] = static_cast<OutT>(v_h(topo, t, u, v, k, j, i));
               });
 
           is_allocated = true;
@@ -415,14 +367,7 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
 
       if (!is_allocated) {
         if (vinfo.is_sparse) {
-          hsize_t varSize{};
-          if (vinfo.where == MetadataFlag(Metadata::Cell)) {
-            varSize = vinfo.nx6 * vinfo.nx5 * vinfo.nx4 * (out_kb.e - out_kb.s + 1) *
-                      (out_jb.e - out_jb.s + 1) * (out_ib.e - out_ib.s + 1);
-          } else {
-            varSize =
-                vinfo.nx6 * vinfo.nx5 * vinfo.nx4 * vinfo.nx3 * vinfo.nx2 * vinfo.nx1;
-          }
+          hsize_t varSize = vinfo.FillSize(theDomain);
           auto fill_val =
               output_params.sparse_seed_nans ? std::numeric_limits<OutT>::quiet_NaN() : 0;
           std::fill(tmpData.data() + index, tmpData.data() + index + varSize, fill_val);
@@ -438,8 +383,13 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
 
     Kokkos::Profiling::pushRegion("write variable data");
     // write data to file
-    HDF5WriteND(file, var_name, tmpData.data(), ndim, &local_offset[0], &local_count[0],
-                &global_count[0], pl_xfer, pl_dcreate);
+    { // scope so the dataset gets closed
+      HDF5WriteND(file, var_name, tmpData.data(), ndim, &local_offset[0], &local_count[0],
+                  &global_count[0], pl_xfer, pl_dcreate);
+      H5D dset = H5D::FromHIDCheck(H5Dopen2(file, var_name.c_str(), H5P_DEFAULT));
+      HDF5WriteAttribute("TopologicalLocation", Metadata::LocationToString(vinfo.where),
+                         dset);
+    }
     Kokkos::Profiling::popRegion(); // write variable data
     Kokkos::Profiling::popRegion(); // write variable loop
   }
@@ -550,7 +500,7 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
   if (output_params.write_xdmf) {
     Kokkos::Profiling::pushRegion("genXDMF");
     // generate XDMF companion file
-    XDMF::genXDMF(filename, pm, tm, nx1, nx2, nx3, all_vars_info, swarm_info);
+    XDMF::genXDMF(filename, pm, tm, theDomain, nx1, nx2, nx3, all_vars_info, swarm_info);
     Kokkos::Profiling::popRegion(); // genXDMF
   }
 
diff --git a/src/outputs/parthenon_hdf5_types.hpp b/src/outputs/parthenon_hdf5_types.hpp
index e9f771d50619..6a3454b868d3 100644
--- a/src/outputs/parthenon_hdf5_types.hpp
+++ b/src/outputs/parthenon_hdf5_types.hpp
@@ -34,15 +34,17 @@
 #include <tuple>
 #include <vector>
 
+#include "kokkos_abstraction.hpp"
 #include "utils/error_checking.hpp"
 
 namespace parthenon {
 namespace HDF5 {
 
-// Number of dimension of HDF5 field data sets (block x nv x nu x nt x nz x ny x nx)
-static constexpr size_t H5_NDIM = 7;
+// Number of dimension of HDF5 field data sets:
+// (block x n_topological_elements x nv x nu x nt x nz x ny x nx)
+static constexpr size_t H5_NDIM = MAX_VARIABLE_DIMENSION + 1;
 
-static constexpr int OUTPUT_VERSION_FORMAT = 3;
+static constexpr int OUTPUT_VERSION_FORMAT = 4;
 
 /**
  * @brief RAII handles for HDF5. Use the typedefs directly (e.g. `H5A`, `H5D`, etc.)
diff --git a/src/outputs/parthenon_xdmf.cpp b/src/outputs/parthenon_xdmf.cpp
index 08424440eeb1..d16f3507ea00 100644
--- a/src/outputs/parthenon_xdmf.cpp
+++ b/src/outputs/parthenon_xdmf.cpp
@@ -3,7 +3,7 @@
 // Copyright(C) 2023 The Parthenon collaboration
 // Licensed under the 3-clause BSD License, see LICENSE file for details
 //========================================================================================
-// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved.
+// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
 //
 // This program was produced under U.S. Government contract 89233218CNA000001 for Los
 // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
@@ -59,7 +59,8 @@ static void writeXdmfSlabVariableRef(std::ofstream &fid, const std::string &name
                                      const std::vector<std::string> &component_labels,
                                      std::string &hdfFile, int iblock,
                                      const int &num_components, int &ndims, hsize_t *dims,
-                                     const std::string &dims321, bool isVector);
+                                     const std::string &dims321, bool isVector,
+                                     MetadataFlag where);
 static std::string ParticleDatasetRef(const std::string &prefix,
                                       const std::string &swmname,
                                       const std::string &varname,
@@ -69,10 +70,12 @@ static std::string ParticleDatasetRef(const std::string &prefix,
 static void ParticleVariableRef(std::ofstream &xdmf, const std::string &varname,
                                 const SwarmVarInfo &varinfo, const std::string &swmname,
                                 const std::string &hdffile, int particle_count);
+static std::string LocationToStringRef(MetadataFlag where);
 } // namespace impl
 
-void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, int nx1, int nx2, int nx3,
-             const std::vector<VarInfo> &var_list, const AllSwarmInfo &all_swarm_info) {
+void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, IndexDomain domain, int nx1,
+             int nx2, int nx3, const std::vector<VarInfo> &var_list,
+             const AllSwarmInfo &all_swarm_info) {
   using namespace HDF5;
   using namespace OutputUtils;
   using namespace impl;
@@ -85,7 +88,7 @@ void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, int nx1, int nx2, int n
   }
   std::string filename_aux = hdfFile + ".xdmf";
   std::ofstream xdmf;
-  hsize_t dims[H5_NDIM] = {0, 0, 0, 0, 0, 0, 0};
+  hsize_t dims[H5_NDIM] = {0}; // zero-initialized
 
   // open file
   xdmf = std::ofstream(filename_aux.c_str(), std::ofstream::trunc);
@@ -114,9 +117,6 @@ void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, int nx1, int nx2, int n
 
   // Now write Grid for each block
   dims[0] = pm->nbtotal;
-  std::string dims321 =
-      std::to_string(nx3) + " " + std::to_string(nx2) + " " + std::to_string(nx1);
-
   for (int ib = 0; ib < pm->nbtotal; ib++) {
     xdmf << "    <Grid GridType=\"Uniform\" Name=\"" << ib << "\">" << std::endl;
     xdmf << blockTopology;
@@ -150,26 +150,23 @@ void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, int nx1, int nx2, int n
     // write graphics variables
     int ndim;
     for (const auto &vinfo : var_list) {
-      std::vector<hsize_t> alldims(
-          {static_cast<hsize_t>(vinfo.nx6), static_cast<hsize_t>(vinfo.nx5),
-           static_cast<hsize_t>(vinfo.nx4), static_cast<hsize_t>(vinfo.nx3),
-           static_cast<hsize_t>(vinfo.nx2), static_cast<hsize_t>(vinfo.nx1)});
-      // Only cell-based data currently supported for visualization
-      if (vinfo.where == MetadataFlag(Metadata::Cell)) {
-        ndim = 3 + vinfo.tensor_rank + 1;
-        for (int i = 0; i < vinfo.tensor_rank; i++) {
-          dims[1 + i] = alldims[3 - vinfo.tensor_rank + i];
-        }
-        dims[vinfo.tensor_rank + 1] = nx3;
-        dims[vinfo.tensor_rank + 2] = nx2;
-        dims[vinfo.tensor_rank + 3] = nx1;
-      } else {
+      // JMM: I can't figure out how to get faces/edges to work and
+      // I'm not going try any longer. More eyes appreciated.
+      if ((vinfo.where != MetadataFlag({Metadata::Cell})) &&
+          (vinfo.where != MetadataFlag({Metadata::Node}))) {
         continue;
       }
-
+      ndim = vinfo.FillShape<hsize_t>(domain, &(dims[1])) + 1;
       const int num_components = vinfo.num_components;
+      nx3 = dims[ndim - 3];
+      nx2 = dims[ndim - 2];
+      nx1 = dims[ndim - 1];
+      std::string dims321 =
+          std::to_string(nx3) + " " + std::to_string(nx2) + " " + std::to_string(nx1);
+
       writeXdmfSlabVariableRef(xdmf, vinfo.label, vinfo.component_labels, hdfFile, ib,
-                               num_components, ndim, dims, dims321, vinfo.is_vector);
+                               num_components, ndim, dims, dims321, vinfo.is_vector,
+                               vinfo.where);
     }
     xdmf << "    </Grid>" << std::endl;
   }
@@ -241,11 +238,14 @@ static void writeXdmfSlabVariableRef(std::ofstream &fid, const std::string &name
                                      const std::vector<std::string> &component_labels,
                                      std::string &hdfFile, int iblock,
                                      const int &num_components, int &ndims, hsize_t *dims,
-                                     const std::string &dims321, bool isVector) {
+                                     const std::string &dims321, bool isVector,
+                                     MetadataFlag where) {
   // writes a slab reference to file
   std::vector<std::string> names;
   int nentries = 1;
-  if (num_components == 1 || isVector) {
+  // TODO(JMM): this is not generic
+  bool is_cell_vector = isVector && (where == MetadataFlag({Metadata::Cell}));
+  if (num_components == 1 || is_cell_vector) {
     // we only make one entry, because either num_components == 1, or we write this as a
     // vector
     names.push_back(name);
@@ -256,10 +256,10 @@ static void writeXdmfSlabVariableRef(std::ofstream &fid, const std::string &name
     }
   }
   const int tensor_dims = ndims - 1 - 3;
-
+  auto wherestring = LocationToStringRef(where);
   if (tensor_dims == 0) {
     const std::string prefix = "      ";
-    fid << prefix << R"(<Attribute Name=")" << names[0] << R"(" Center="Cell")";
+    fid << prefix << R"(<Attribute Name=")" << names[0] << wherestring;
     fid << ">" << std::endl;
     fid << prefix << "  "
         << R"(<DataItem ItemType="HyperSlab" Dimensions=")";
@@ -282,8 +282,8 @@ static void writeXdmfSlabVariableRef(std::ofstream &fid, const std::string &name
   } else if (tensor_dims == 1) {
     const std::string prefix = "      ";
     for (int i = 0; i < nentries; i++) {
-      fid << prefix << R"(<Attribute Name=")" << names[i] << R"(" Center="Cell")";
-      if (isVector) {
+      fid << prefix << R"(<Attribute Name=")" << names[i] << wherestring;
+      if (is_cell_vector) {
         fid << R"( AttributeType="Vector")"
             << R"( Dimensions=")" << dims[1] << " " << dims321 << R"(")";
       }
@@ -410,6 +410,21 @@ static void ParticleVariableRef(std::ofstream &xdmf, const std::string &varname,
     }
   }
 }
+
+static std::string LocationToStringRef(MetadataFlag where) {
+  if (where == MetadataFlag({Metadata::Node})) {
+    return R"(" Center="Node")";
+  } else if (where == MetadataFlag({Metadata::Cell})) {
+    return R"(" Center="Cell")";
+  } else if (where == MetadataFlag({Metadata::Face})) {
+    return R"(" Center="Face")";
+  } else if (where == MetadataFlag({Metadata::Edge})) {
+    return R"(" Center="Edge")";
+  } else { // if (where == MetadataFlag({Metadata::None})) {
+    return R"(" Center="Other")";
+  }
+}
+
 } // namespace impl
 } // namespace XDMF
 } // namespace parthenon
diff --git a/src/outputs/parthenon_xdmf.hpp b/src/outputs/parthenon_xdmf.hpp
index ce747d1dfc7d..ef15b2a81a32 100644
--- a/src/outputs/parthenon_xdmf.hpp
+++ b/src/outputs/parthenon_xdmf.hpp
@@ -28,8 +28,8 @@
 namespace parthenon {
 // forward declarations
 namespace XDMF {
-void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, int nx1, int nx2, int nx3,
-             const std::vector<OutputUtils::VarInfo> &var_list,
+void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, IndexDomain domain, int nx1,
+             int nx2, int nx3, const std::vector<OutputUtils::VarInfo> &var_list,
              const OutputUtils::AllSwarmInfo &all_swarm_info);
 } // namespace XDMF
 } // namespace parthenon
diff --git a/src/outputs/restart.hpp b/src/outputs/restart.hpp
index ef8ed324d274..e986acbaf028 100644
--- a/src/outputs/restart.hpp
+++ b/src/outputs/restart.hpp
@@ -27,6 +27,7 @@
 
 #include "interface/metadata.hpp"
 #include "mesh/domain.hpp"
+#include "outputs/output_utils.hpp"
 #include "utils/error_checking.hpp"
 
 namespace parthenon {
@@ -87,9 +88,8 @@ class RestartReader {
   // Assumes blocks are contiguous
   // fills internal data for given pointer
   virtual void ReadBlocks(const std::string &name, IndexRange range,
-                          std::vector<Real> &dataVec, const std::vector<size_t> &bsize,
-                          int file_output_format_version, MetadataFlag where,
-                          const std::vector<int> &shape = {}) const = 0;
+                          const OutputUtils::VarInfo &info, std::vector<Real> &dataVec,
+                          int file_output_format_version) const = 0;
 
   // Gets the data from a swarm var on current rank. Assumes all
   // blocks are contiguous. Fills dataVec based on shape from swarmvar
diff --git a/src/outputs/restart_hdf5.cpp b/src/outputs/restart_hdf5.cpp
index cb54bd85288f..a5016bc68e8d 100644
--- a/src/outputs/restart_hdf5.cpp
+++ b/src/outputs/restart_hdf5.cpp
@@ -27,6 +27,7 @@
 #include "interface/params.hpp"
 #include "mesh/mesh.hpp"
 #include "mesh/meshblock.hpp"
+#include "outputs/output_utils.hpp"
 #include "outputs/outputs.hpp"
 #ifdef ENABLE_HDF5
 #include "outputs/parthenon_hdf5.hpp"
@@ -180,66 +181,34 @@ void RestartReaderHDF5::ReadParams(const std::string &name, Params &p) {
 #endif // ENABLE_HDF5
 }
 void RestartReaderHDF5::ReadBlocks(const std::string &name, IndexRange range,
+                                   const OutputUtils::VarInfo &info,
                                    std::vector<Real> &dataVec,
-                                   const std::vector<size_t> &bsize,
-                                   int file_output_format_version, MetadataFlag where,
-                                   const std::vector<int> &shape) const {
+                                   int file_output_format_version) const {
 #ifndef ENABLE_HDF5
   PARTHENON_FAIL("Restart functionality is not available because HDF5 is disabled");
 #else  // HDF5 enabled
   auto hdl = OpenDataset<Real>(name);
 
-  constexpr int CHUNK_MAX_DIM = 7;
+  const int VNDIM = info.VNDIM;
 
   /** Select hyperslab in dataset **/
-  hsize_t offset[CHUNK_MAX_DIM] = {static_cast<hsize_t>(range.s), 0, 0, 0, 0, 0, 0};
-  hsize_t count[CHUNK_MAX_DIM];
   int total_dim = 0;
-  if (file_output_format_version == -1) {
-    size_t vlen = 1;
-    for (int i = 0; i < shape.size(); i++) {
-      vlen *= shape[i];
-    }
-    count[0] = static_cast<hsize_t>(range.e - range.s + 1);
-    count[1] = bsize[2];
-    count[2] = bsize[1];
-    count[3] = bsize[0];
-    count[4] = vlen;
-    total_dim = 5;
-  } else if (file_output_format_version == 2) {
-    PARTHENON_REQUIRE(shape.size() <= 1,
-                      "Higher than vector datatypes are unstable in output versions < 3");
-    size_t vlen = 1;
-    for (int i = 0; i < shape.size(); i++) {
-      vlen *= shape[i];
-    }
-    count[0] = static_cast<hsize_t>(range.e - range.s + 1);
-    count[1] = vlen;
-    count[2] = bsize[2];
-    count[3] = bsize[1];
-    count[4] = bsize[0];
-    total_dim = 5;
-  } else if (file_output_format_version == HDF5::OUTPUT_VERSION_FORMAT) {
-    count[0] = static_cast<hsize_t>(range.e - range.s + 1);
-    const int ndim = shape.size();
-    if (where == MetadataFlag(Metadata::Cell)) {
-      for (int i = 0; i < ndim; i++) {
-        count[1 + i] = shape[ndim - i - 1];
-      }
-      count[ndim + 1] = bsize[2];
-      count[ndim + 2] = bsize[1];
-      count[ndim + 3] = bsize[0];
-      total_dim = 3 + ndim + 1;
-    } else if (where == MetadataFlag(Metadata::None)) {
-      for (int i = 0; i < ndim; i++) {
-        count[1 + i] = shape[ndim - i - 1];
-      }
-      total_dim = ndim + 1;
-    } else {
-      PARTHENON_THROW("Only Cell and None locations supported!");
-    }
+  hsize_t offset[VNDIM], count[VNDIM];
+  std::fill(offset + 1, offset + VNDIM, 0);
+  std::fill(count + 1, count + VNDIM, 1);
+
+  offset[0] = static_cast<hsize_t>(range.s);
+  count[0] = static_cast<hsize_t>(range.e - range.s + 1);
+  const IndexDomain domain = hasGhost ? IndexDomain::entire : IndexDomain::interior;
+
+  // Currently supports versions 3 and 4.
+  if (file_output_format_version >= HDF5::OUTPUT_VERSION_FORMAT - 1) {
+    total_dim = info.FillShape<hsize_t>(domain, &(count[1])) + 1;
   } else {
-    PARTHENON_THROW("Unknown output format version in restart file.")
+    std::stringstream msg;
+    msg << "File format version " << file_output_format_version << " not supported. "
+        << "Current format is " << HDF5::OUTPUT_VERSION_FORMAT << std::endl;
+    PARTHENON_THROW(msg)
   }
 
   hsize_t total_count = 1;
diff --git a/src/outputs/restart_hdf5.hpp b/src/outputs/restart_hdf5.hpp
index edce38825841..562b67daf78b 100644
--- a/src/outputs/restart_hdf5.hpp
+++ b/src/outputs/restart_hdf5.hpp
@@ -111,9 +111,9 @@ class RestartReaderHDF5 : public RestartReader {
   // Gets data for all blocks on current rank.
   // Assumes blocks are contiguous
   // fills internal data for given pointer
-  void ReadBlocks(const std::string &name, IndexRange range, std::vector<Real> &dataVec,
-                  const std::vector<size_t> &bsize, int file_output_format_version,
-                  MetadataFlag where, const std::vector<int> &shape = {}) const override;
+  void ReadBlocks(const std::string &name, IndexRange range,
+                  const OutputUtils::VarInfo &info, std::vector<Real> &dataVec,
+                  int file_output_format_version) const override;
 
   // Gets the data from a swarm var on current rank. Assumes all
   // blocks are contiguous. Fills dataVec based on shape from swarmvar
diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp
index 28a084bcd5e0..91a87c386e6c 100644
--- a/src/parthenon_manager.cpp
+++ b/src/parthenon_manager.cpp
@@ -239,18 +239,13 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
   std::cout << "Blocks assigned to rank " << Globals::my_rank << ": " << nbs << ":" << nbe
             << std::endl;
 
+  // Currently supports versions 3 and 4.
   const auto file_output_format_ver = resfile.GetOutputFormatVersion();
-  if (file_output_format_ver == -1) {
-    // Being extra stringent here so that we don't forget to update the machinery when
-    // another change happens.
-    PARTHENON_REQUIRE_THROWS(
-        HDF5::OUTPUT_VERSION_FORMAT == 2 || HDF5::OUTPUT_VERSION_FORMAT == 3,
-        "Auto conversion from original to format 2 or 3 not implemented yet.")
-
-    if (Globals::my_rank == 0) {
-      PARTHENON_WARN("Restarting from a old output file format. New outputs written with "
-                     "this binary will use new format.")
-    }
+  if (file_output_format_ver < HDF5::OUTPUT_VERSION_FORMAT - 1) {
+    std::stringstream msg;
+    msg << "File format version " << file_output_format_ver << " not supported. "
+        << "Current format is " << HDF5::OUTPUT_VERSION_FORMAT << std::endl;
+    PARTHENON_THROW(msg)
   }
 
   // Get an iterator on block 0 for variable listing
@@ -270,6 +265,8 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
   const auto indep_restart_vars =
       GetAnyVariables(mb.meshblock_data.Get()->GetVariableVector(),
                       {parthenon::Metadata::Independent, parthenon::Metadata::Restart});
+  const auto all_vars_info =
+      OutputUtils::VarInfo::GetAll(indep_restart_vars, mb.cellbounds);
 
   const auto sparse_info = resfile.GetSparseInfo();
   // create map of sparse field labels to index in the SparseInfo table
@@ -281,11 +278,11 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
   // Allocate space based on largest vector
   int max_vlen = 1;
   int num_sparse = 0;
-  for (auto &v_info : indep_restart_vars) {
-    const auto &label = v_info->label();
+  for (const auto &v_info : all_vars_info) {
+    const auto &label = v_info.label;
 
     // check that variable is in the list of sparse fields if and only if it is sparse
-    if (v_info->IsSparse()) {
+    if (v_info.is_sparse) {
       ++num_sparse;
       PARTHENON_REQUIRE_THROWS(sparse_idxs.count(label) == 1,
                                "Sparse field " + label +
@@ -295,7 +292,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
                                "Dense field " + label +
                                    " is marked as sparse in restart file");
     }
-    max_vlen = std::max(max_vlen, v_info->NumComponents());
+    max_vlen = std::max(max_vlen, v_info.num_components);
   }
 
   // make sure we have all sparse variables that are in the restart file
@@ -304,20 +301,16 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
       "Mismatch between sparse fields in simulation and restart file");
 
   std::vector<Real> tmp(static_cast<size_t>(nb) * nCells * max_vlen);
-  for (auto &v_info : indep_restart_vars) {
-    const auto vlen = v_info->NumComponents();
-    const auto &label = v_info->label();
-    const auto &Nv = v_info->GetDim(4);
-    const auto &Nu = v_info->GetDim(5);
-    const auto &Nt = v_info->GetDim(6);
+  for (const auto &v_info : all_vars_info) {
+    const auto vlen = v_info.num_components;
+    const auto &label = v_info.label;
 
     if (Globals::my_rank == 0) {
       std::cout << "Var: " << label << ":" << vlen << std::endl;
     }
     // Read relevant data from the hdf file, this works for dense and sparse variables
     try {
-      resfile.ReadBlocks(label, myBlocks, tmp, bsize, file_output_format_ver,
-                         v_info->metadata().Where(), v_info->metadata().Shape());
+      resfile.ReadBlocks(label, myBlocks, v_info, tmp, file_output_format_ver);
     } catch (std::exception &ex) {
       std::cout << "[" << Globals::my_rank << "] WARNING: Failed to read variable "
                 << label << " from restart file:" << std::endl
@@ -327,7 +320,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
 
     size_t index = 0;
     for (auto &pmb : rm.block_list) {
-      if (v_info->IsSparse()) {
+      if (v_info.is_sparse) {
         // check if the sparse variable is allocated on this block
         if (sparse_info.IsAllocated(pmb->gid, sparse_idxs.at(label))) {
           pmb->AllocateSparse(label);
@@ -343,25 +336,17 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) {
 
       // Double note that this also needs to be update in case
       // we update the HDF5 infrastructure!
-      if (file_output_format_ver == -1) {
-        PARTHENON_WARN("This file output format version is deprecrated and will be "
-                       "removed in a future release.");
-        for (int k = out_kb.s; k <= out_kb.e; ++k) {
-          for (int j = out_jb.s; j <= out_jb.e; ++j) {
-            for (int i = out_ib.s; i <= out_ib.e; ++i) {
-              for (int l = 0; l < vlen; ++l) {
-                v_h(l, k, j, i) = tmp[index++];
-              }
-            }
-          }
-        }
-      } else if (file_output_format_ver == 2 ||
-                 file_output_format_ver == HDF5::OUTPUT_VERSION_FORMAT) {
-        OutputUtils::PackOrUnpackVar(pmb.get(), v.get(), resfile.hasGhost, index, tmp,
-                                     [&](auto index, int t, int u, int v, int k, int j,
-                                         int i) { v_h(t, u, v, k, j, i) = tmp[index]; });
+      if (file_output_format_ver >= HDF5::OUTPUT_VERSION_FORMAT - 1) {
+        OutputUtils::PackOrUnpackVar(
+            v_info, resfile.hasGhost, index,
+            [&](auto index, int topo, int t, int u, int v, int k, int j, int i) {
+              v_h(topo, t, u, v, k, j, i) = tmp[index];
+            });
       } else {
-        PARTHENON_THROW("Unknown output format version in restart file.")
+        std::stringstream msg;
+        msg << "File format version " << file_output_format_ver << " not supported. "
+            << "Current format is " << HDF5::OUTPUT_VERSION_FORMAT << std::endl;
+        PARTHENON_THROW(msg)
       }
 
       v->data.DeepCopy(v_h);
diff --git a/tst/unit/CMakeLists.txt b/tst/unit/CMakeLists.txt
index bd3d2bf3b946..035e431c5c71 100644
--- a/tst/unit/CMakeLists.txt
+++ b/tst/unit/CMakeLists.txt
@@ -33,6 +33,7 @@ list(APPEND unit_tests_SOURCES
     test_meshblock_data_iterator.cpp
     test_mesh_data.cpp
     test_nan_tags.cpp
+    test_output_utils.cpp
     test_pararrays.cpp
     test_sparse_pack.cpp
     test_swarm.cpp
diff --git a/tst/unit/test_output_utils.cpp b/tst/unit/test_output_utils.cpp
new file mode 100644
index 000000000000..4b04f3851eba
--- /dev/null
+++ b/tst/unit/test_output_utils.cpp
@@ -0,0 +1,281 @@
+//========================================================================================
+// Parthenon performance portable AMR framework
+// Copyright(C) 2024 The Parthenon collaboration
+// Licensed under the 3-clause BSD License, see LICENSE file for details
+//========================================================================================
+// (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved.
+//
+// This program was produced under U.S. Government contract 89233218CNA000001 for Los
+// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
+// for the U.S. Department of Energy/National Nuclear Security Administration. All rights
+// in the program are reserved by Triad National Security, LLC, and the U.S. Department
+// of Energy/National Nuclear Security Administration. The Government is granted for
+// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
+// license in this material to reproduce, prepare derivative works, distribute copies to
+// the public, perform publicly and display publicly, and to permit others to do so.
+//========================================================================================
+
+#include <cstdio>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <catch2/catch.hpp>
+
+#include "basic_types.hpp"
+#include "globals.hpp"
+#include "interface/mesh_data.hpp"
+#include "interface/meshblock_data.hpp"
+#include "interface/metadata.hpp"
+#include "interface/state_descriptor.hpp"
+#include "interface/variable_pack.hpp"
+#include "kokkos_abstraction.hpp"
+#include "mesh/domain.hpp"
+#include "mesh/meshblock.hpp"
+#include "outputs/output_utils.hpp"
+
+using parthenon::BlockList_t;
+using parthenon::DevExecSpace;
+using parthenon::IndexDomain;
+using parthenon::IndexShape;
+using parthenon::loop_pattern_mdrange_tag;
+using parthenon::MeshBlock;
+using parthenon::MeshBlockData;
+using parthenon::MeshData;
+using parthenon::Metadata;
+using parthenon::PackIndexMap;
+using parthenon::par_for;
+using parthenon::Real;
+using parthenon::StateDescriptor;
+
+using namespace parthenon::OutputUtils;
+
+TEST_CASE("The VarInfo object produces appropriate ranges", "[VarInfo][OutputUtils]") {
+  GIVEN("A MeshBlock with some vars on it") {
+    constexpr int NG = 2;
+    constexpr int NSIDE = 16;
+    constexpr int NDIM = 3;
+    constexpr int NFULL = (NSIDE + 2 * NG);
+
+    constexpr auto interior = parthenon::IndexDomain::interior;
+    constexpr auto entire = parthenon::IndexDomain::entire;
+
+    // JMM: This needs to be reset to 0 when we're done, because other
+    // tests assume it's unset, thus zero-initialized.
+    parthenon::Globals::nghost = NG;
+
+    auto pkg = std::make_shared<StateDescriptor>("Test package");
+
+    const std::string scalar_cell = "scalar_cell";
+    Metadata m({Metadata::Cell, Metadata::Independent});
+    pkg->AddField(scalar_cell, m);
+
+    const std::string tensor_cell = "tensor_cell";
+    m = Metadata({Metadata::Cell, Metadata::Independent}, std::vector<int>{3, 4});
+    pkg->AddField(tensor_cell, m);
+
+    const std::string tensor_none = "tensor_none";
+    m = Metadata({Metadata::None, Metadata::Independent}, std::vector<int>{3, 4});
+    pkg->AddField(tensor_none, m);
+
+    // four-vector-valued var with a vector at each face
+    const std::string vector_face = "four_vector_face";
+    m = Metadata({Metadata::Face, Metadata::Independent}, std::vector<int>{4});
+    pkg->AddField(vector_face, m);
+
+    // vector-valued var with a single value at each edge
+    const std::string scalar_edge = "scalar_edge";
+    m = Metadata({Metadata::Edge, Metadata::Independent});
+    pkg->AddField(scalar_edge, m);
+
+    const std::vector<std::string> var_names = {scalar_cell, tensor_cell, tensor_none,
+                                                vector_face, scalar_edge};
+
+    auto pmb = std::make_shared<MeshBlock>(NSIDE, NDIM);
+    auto pmbd = pmb->meshblock_data.Get();
+    pmbd->Initialize(pkg, pmb);
+
+    IndexShape cellbounds = pmb->cellbounds;
+    THEN("The CellBounds object is reasonable") {
+      REQUIRE(cellbounds.ncellsk(entire) == NSIDE + 2 * NG);
+      REQUIRE(cellbounds.ncellsk(interior) == NSIDE);
+      REQUIRE(cellbounds.ncellsj(entire) == NSIDE + 2 * NG);
+      REQUIRE(cellbounds.ncellsj(interior) == NSIDE);
+      REQUIRE(cellbounds.ncellsi(entire) == NSIDE + 2 * NG);
+      REQUIRE(cellbounds.ncellsi(interior) == NSIDE);
+    }
+
+    WHEN("We initialize VarInfo on a scalar cell var") {
+      auto v = pmbd->GetVarPtr(scalar_cell);
+      VarInfo info(v, cellbounds);
+
+      THEN("The shape is correct over both interior and entire") {
+        std::vector<int> shape(10);
+        int ndim = info.FillShape<int>(interior, shape.data());
+        REQUIRE(ndim == 3);
+        for (int i = 0; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE);
+        }
+
+        ndim = info.FillShape<int>(entire, shape.data());
+        REQUIRE(ndim == 3);
+        for (int i = 0; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 2 * NG);
+        }
+      }
+      THEN("The size and tensorsize are correct") {
+        REQUIRE(info.Size() == NFULL * NFULL * NFULL);
+        REQUIRE(info.TensorSize() * info.ntop_elems == 1);
+      }
+    }
+
+    WHEN("We initialize VarInfo on a tensor cell var") {
+      auto v = pmbd->GetVarPtr(tensor_cell);
+      VarInfo info(v, cellbounds);
+
+      THEN("The shape is correct over both interior and entire") {
+        std::vector<int> shape(10);
+        int ndim = info.FillShape<int>(interior, shape.data());
+        REQUIRE(ndim == 5);
+        REQUIRE(shape[0] == 4);
+        REQUIRE(shape[1] == 3);
+        for (int i = 2; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE);
+        }
+
+        ndim = info.FillShape<int>(entire, shape.data());
+        REQUIRE(ndim == 5);
+        REQUIRE(shape[0] == 4);
+        REQUIRE(shape[1] == 3);
+        for (int i = 2; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 2 * NG);
+        }
+      }
+      THEN("The size and tensorsize are correct") {
+        REQUIRE(info.Size() == 3 * 4 * NFULL * NFULL * NFULL);
+        REQUIRE(info.TensorSize() * info.ntop_elems == 3 * 4);
+      }
+    }
+
+    WHEN("We initialize VarInfo on a tensor no-centering var") {
+      auto v = pmbd->GetVarPtr(tensor_none);
+      VarInfo info(v, cellbounds);
+
+      THEN("The shape is correct over both interior and entire") {
+        std::vector<int> shape(10);
+        int ndim = info.FillShape<int>(interior, shape.data());
+        REQUIRE(ndim == 2);
+        REQUIRE(shape[0] == 4);
+        REQUIRE(shape[1] == 3);
+
+        ndim = info.FillShape<int>(entire, shape.data());
+        REQUIRE(ndim == 2);
+        REQUIRE(shape[0] == 4);
+        REQUIRE(shape[1] == 3);
+      }
+      THEN("The size and tensorsize are correct") {
+        REQUIRE(info.Size() == 3 * 4);
+        REQUIRE(info.TensorSize() * info.ntop_elems == 3 * 4);
+      }
+    }
+
+    WHEN("We initialize VarInfo on a vector face var") {
+      auto v = pmbd->GetVarPtr(vector_face);
+      VarInfo info(v, cellbounds);
+
+      THEN("The shape is correct over both interior and entire") {
+        std::vector<int> shape(10);
+        int ndim = info.FillShape<int>(interior, shape.data());
+        REQUIRE(ndim == 5);
+        REQUIRE(shape[0] == 3);
+        REQUIRE(shape[1] == 4);
+        for (int i = 2; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 1);
+        }
+        info.FillShape<int>(entire, shape.data());
+        for (int i = 2; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 2 * NG + 1);
+        }
+      }
+      THEN("The size and tensorsize are correct") {
+        REQUIRE(info.Size() == 3 * 4 * (NFULL + 1) * (NFULL + 1) * (NFULL + 1));
+        REQUIRE(info.TensorSize() * info.ntop_elems == 3 * 4);
+      }
+      THEN("Requesting reversed padded shape provides correctly shaped object") {
+        constexpr int ND = VarInfo::VNDIM;
+        auto padded_shape = info.GetPaddedShapeReversed(interior);
+        REQUIRE(padded_shape.size() == ND);
+        REQUIRE(padded_shape[ND - 1] == NSIDE + 1);
+        REQUIRE(padded_shape[ND - 2] == NSIDE + 1);
+        REQUIRE(padded_shape[ND - 3] == NSIDE + 1);
+        REQUIRE(padded_shape[ND - 4] == 4);
+        REQUIRE(padded_shape[0] == 3);
+        for (int i = 1; i < ND - 5; ++i) {
+          REQUIRE(padded_shape[i] == 1);
+        }
+      }
+      THEN("Requesting padded shape provides correctly shaped object") {
+        constexpr int ND = VarInfo::VNDIM;
+        auto padded_shape = info.GetPaddedShape(interior);
+        REQUIRE(padded_shape.size() == ND);
+        REQUIRE(padded_shape[0] == NSIDE + 1);
+        REQUIRE(padded_shape[1] == NSIDE + 1);
+        REQUIRE(padded_shape[2] == NSIDE + 1);
+        REQUIRE(padded_shape[3] == 4);
+        for (int i = 4; i < ND - 1; ++i) {
+          REQUIRE(padded_shape[i] == 1);
+        }
+        REQUIRE(padded_shape[ND - 1] == 3);
+      }
+
+      THEN("The padded bounds are correct") {
+        auto [kb, jb, ib] = info.GetPaddedBoundsKJI(interior);
+        REQUIRE(kb.s == NG);
+        REQUIRE(kb.e == NSIDE + NG);
+        REQUIRE(jb.s == NG);
+        REQUIRE(jb.e == NSIDE + NG);
+        REQUIRE(ib.s == NG);
+        REQUIRE(ib.e == NSIDE + NG);
+      }
+    }
+
+    WHEN("We initialize VarInfo on a scaler edge var") {
+      auto v = pmbd->GetVarPtr(scalar_edge);
+      VarInfo info(v, cellbounds);
+
+      THEN("The shape is correct over both interior and entire") {
+        std::vector<int> shape(10);
+        int ndim = info.FillShape<int>(interior, shape.data());
+        REQUIRE(ndim == 4);
+        REQUIRE(shape[0] == 3);
+        for (int i = 1; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 1);
+        }
+        info.FillShape<int>(entire, shape.data());
+        for (int i = 1; i < ndim; ++i) {
+          REQUIRE(shape[i] == NSIDE + 2 * NG + 1);
+        }
+      }
+      THEN("The size and tensorsize are correct") {
+        REQUIRE(info.Size() == 3 * (NFULL + 1) * (NFULL + 1) * (NFULL + 1));
+        REQUIRE(info.TensorSize() * info.ntop_elems == 3);
+      }
+    }
+
+    WHEN("We request info from all vars") {
+      auto vars = parthenon::GetAnyVariables(pmbd->GetVariableVector(),
+                                             {parthenon::Metadata::Independent});
+      auto all_info = VarInfo::GetAll(vars, cellbounds);
+      THEN("The labels are all present") {
+        for (const std::string &name : var_names) {
+          auto pinfo = std::find(all_info.begin(), all_info.end(), name);
+          REQUIRE(pinfo != all_info.end());
+        }
+      }
+    }
+
+    // JMM: This needs to be reset to 0 when we're done, because other
+    // tests assume it's unset, thus zero-initialized.
+    parthenon::Globals::nghost = 0;
+  }
+}