feat: skeletonize (most) autapses and improve cross section analysis (#…

…179) * wip(skeletonizing): add support for fixing autapses in graphene volumes * feat: add support for graphene timestamp * wip: ensure shape matches atomic chunks * fix: add back in xyzrange * fix(SkeletonTask): generalize frag_path for windows Also interpret bare paths as "file://" * fix(SkeletonTask): use agglomerate and timestamp correctly * fix: merge error removed cross_sectional_area params * fix(SkeletonTask): apply timestamp in correct location * fix: fixup the edges of the vcg with root vcg * feat: remove requirements for a queue parameter * redesign: use vol.info to get skeleton path * fix: several errors * feat: add fix_autapses to cli * fix: compensate for mip levels * fix: wrong path joining * fix: ensure mesh bounds are rectified after expansion * fix: wrong variable name in shard downsample * fix(shards/image): fix situation where chunk.z == dataset.z * wip(skeletonizing): add support for fixing autapses in graphene volumes * feat: add support for graphene timestamp * wip: ensure shape matches atomic chunks * fix: add back in xyzrange * fix(SkeletonTask): generalize frag_path for windows Also interpret bare paths as "file://" * fix(SkeletonTask): use agglomerate and timestamp correctly * fix: merge error removed cross_sectional_area params * fix(SkeletonTask): apply timestamp in correct location * fix: fixup the edges of the vcg with root vcg * feat: remove requirements for a queue parameter * redesign: use vol.info to get skeleton path * fix: several errors * feat: add fix_autapses to cli * fix: compensate for mip levels * fix: wrong path joining * fix: ensure mesh bounds are rectified after expansion * fix: don't import view from CloudVolume * fix: don't import view from cloudvolume * fix: intelligent frag path resolution * fix: handle binarization issue in repairing * fix: smart path * perf: skip downloading the "huge" bbox if no skeletons * fix: ensure info file uploaded to frag path * fix: check for key * feat: support agglomeration for graphene for cross sections * feat: support static root_ids for a graphene volume * fix: regression in LuminanceLevelsTask due to CloudVolume upgrade * refactor: simplify vcg code * feat: add encoding level support for jpegxl * fix: don't print error if max_mips is non-zero --------- Co-authored-by: William Silversmith <[email protected]>
seung-lab · Aug 14, 2024 · 2df3415 · 2df3415
1 parent 5f9f18f
commit 2df3415
Show file tree

Hide file tree

Showing 6 changed files with 214 additions and 52 deletions.
diff --git a/igneous/task_creation/common.py b/igneous/task_creation/common.py
@@ -226,6 +226,8 @@ def set_encoding(cv, mip, encoding, encoding_level):
 
  if encoding == "jpeg":
  scale["jpeg_quality"] = encoding_level
+ elif encoding == "jpegxl":
+ scale["jpegxl_quality"] = encoding_level
  elif encoding == "png":
  scale["png_level"] = encoding_level
  elif encoding == "fpzip":

diff --git a/igneous/task_creation/image.py b/igneous/task_creation/image.py
@@ -406,14 +406,6 @@ def create_sharded_image_info(
  # maximum amount of information in the morton codes
  grid_size = np.ceil(Vec(*dataset_size) / Vec(*chunk_size)).astype(np.int64)
  max_bits = sum([ math.ceil(math.log2(size)) for size in grid_size ])
- if max_bits > 64:
- raise ValueError(
- f"{max_bits}, more than a 64-bit integer, "
- "would be required to describe the chunk positions "
- "in this dataset. Try increasing the chunk size or "
- "increasing dataset bounds."
- f"Dataset Size: {dataset_size} Chunk Size: {chunk_size}"
- )
 
  chunks_per_shard = math.ceil(uncompressed_shard_bytesize / (chunk_voxels * byte_width))
  chunks_per_shard = 2 ** int(math.log2(chunks_per_shard))
@@ -423,7 +415,7 @@ def create_sharded_image_info(
 
  # approximate, would need to account for rounding effects to be exact
  # rounding is corrected for via max_bits - pre - mini below.
- num_shards = num_chunks / chunks_per_shard 
+ num_shards = num_chunks / chunks_per_shard
 
  def update_bits():
  shard_bits = int(math.ceil(math.log2(num_shards)))
@@ -465,7 +457,25 @@ def update_bits():
  # in the morton codes, so if there's any slack from rounding, the
  # remainder goes into shard bits.
  preshift_bits = preshift_bits - minishard_bits
- shard_bits = max_bits - preshift_bits - minishard_bits
+ if dataset_size[2] == chunk_size[2]:
+ additional_bits = (preshift_bits // 3)
+ i = 0
+ while i < additional_bits:
+ max_bits += 1
+ preshift_bits += 1
+ if preshift_bits % 3 != 0:
+ i += 1
+
+ shard_bits = max(max_bits - preshift_bits - minishard_bits, 0)
+
+ if max_bits > 64:
+ raise ValueError(
+ f"{max_bits}, more than a 64-bit integer, "
+ "would be required to describe the chunk positions "
+ "in this dataset. Try increasing the chunk size or "
+ "increasing dataset bounds."
+ f"Dataset Size: {dataset_size} Chunk Size: {chunk_size}"
+ )
 
  if preshift_bits < 0:
  raise ValueError(f"Preshift bits cannot be negative. ({shard_bits}, {minishard_bits}, {preshift_bits}), total info: {max_bits} bits")

diff --git a/igneous/task_creation/skeleton.py b/igneous/task_creation/skeleton.py
@@ -15,7 +15,7 @@
 from cloudvolume import CloudVolume
 from cloudvolume.lib import Vec, Bbox, max2, min2, xyzrange, find_closest_divisor, yellow, jsonify
 from cloudvolume.datasource.precomputed.sharding import ShardingSpecification
-from cloudfiles import CloudFiles
+from cloudfiles import CloudFiles, CloudFile
 
 from igneous.tasks import ( 
  SkeletonTask, UnshardedSkeletonMergeTask, 
@@ -58,6 +58,7 @@ def bounds_from_mesh(
  bbxes.append(bounds)
 
  bounds = Bbox.expand(*bbxes)
+ bounds = bounds.expand_to_chunk_size(shape, offset=vol.voxel_offset)
  return Bbox.clamp(bounds, vol.bounds)
 
 def create_skeletonizing_tasks(
@@ -71,9 +72,11 @@ def create_skeletonizing_tasks(
  parallel=1, fill_missing=False, 
  sharded=False, frag_path=None, spatial_index=True,
  synapses=None, num_synapses=None,
- dust_global=False, 
+ dust_global=False, fix_autapses=False,
  cross_sectional_area=False,
  cross_sectional_area_smoothing_window=5,
+ timestamp=None,
+ root_ids_cloudpath=None,
  ):
  """
  Assign tasks with one voxel overlap in a regular grid 
@@ -121,6 +124,17 @@ def create_skeletonizing_tasks(
  fix_borders: Allows trivial merging of single overlap tasks. You'll only
  want to set this to false if you're working on single or non-overlapping
  volumes.
+ fix_autapses: Only possible for graphene volumes. Uses PyChunkGraph (PCG) information
+ to fix autapses (when a neuron synapses onto itself). This requires splitting
+ contacts between the edges of two touching voxels. The algorithm for doing this
+ requires much more memory.
+
+ This works by comparing the PYC L2 and root layers. L1 is watershed. L2 is the
+ connections only within an atomic chunk. The root layer provides the global
+ connectivity. Autapses can be distinguished at the L2 level, above that, they
+ may not be (and certainly not at the root level). We extract the voxel connectivity
+ graph from L2 and perform the overall trace at root connectivity.
+
  dust_threshold: don't skeletonize labels smaller than this number of voxels
  as seen by a single task.
  dust_global: Use global voxel counts for the dust threshold instead of from
@@ -155,10 +169,24 @@ def create_skeletonizing_tasks(
  to the total computation.)
  cross_sectional_area_smoothing_window: Perform a rolling average of the 
  normal vectors across these many vectors.
+ timestamp: for graphene volumes only, you can specify the timepoint to use
+ root_ids_cloudpath: for graphene volumes, if you have a materialized archive
+ if your desired timepoint, you can use this path for fetching root ID 
+ segmentation as it is far more efficient.
  """
  shape = Vec(*shape)
  vol = CloudVolume(cloudpath, mip=mip, info=info)
 
+ if fix_autapses:
+ if vol.meta.path.format != "graphene":
+ raise ValueError("fix_autapses can only be performed on graphene volumes.")
+
+ if not np.all(shape % vol.meta.graph_chunk_size == 0):
+ raise ValueError(
+ f"shape must be a multiple of the graph chunk size. Got: {shape}, "
+ f"{vol.meta.graph_chunk_size}"
+ )
+
  if dust_threshold > 0 and dust_global:
  cf = CloudFiles(cloudpath)
  vxctfile = cf.join(vol.key, 'stats', 'voxel_counts.mb')
@@ -201,6 +229,15 @@ def create_skeletonizing_tasks(
 
  vol.skeleton.meta.commit_info()
 
+ if frag_path:
+ frag_info_path = CloudFiles(frag_path).join(frag_path, "info")
+ frag_info = CloudFile(frag_info_path).get_json()
+ if not frag_info:
+ CloudFile(frag_info_path).put_json(vol.skeleton.meta.info)
+ elif 'scales' in frag_info:
+ frag_info_path = CloudFiles(frag_path).join(frag_path, vol.info["skeletons"], "info")
+ CloudFile(frag_info_path).put_json(vol.skeleton.meta.info)
+
  will_postprocess = bool(np.any(vol.bounds.size3() > shape))
  bounds = vol.bounds.clone()
 
@@ -247,8 +284,11 @@ def task(self, shape, offset):
  spatial_grid_shape=shape.clone(), # used for writing index filenames
  synapses=bbox_synapses,
  dust_global=dust_global,
+ fix_autapses=bool(fix_autapses),
+ timestamp=timestamp,
  cross_sectional_area=bool(cross_sectional_area),
  cross_sectional_area_smoothing_window=int(cross_sectional_area_smoothing_window),
+ root_ids_cloudpath=root_ids_cloudpath,
  )
 
  def synapses_for_bbox(self, shape, offset):
@@ -292,8 +332,11 @@ def on_finish(self):
  'spatial_index': bool(spatial_index),
  'synapses': bool(synapses),
  'dust_global': bool(dust_global),
+ 'fix_autapses': bool(fix_autapses),
+ 'timestamp': timestamp,
  'cross_sectional_area': bool(cross_sectional_area),
  'cross_sectional_area_smoothing_window': int(cross_sectional_area_smoothing_window),
+ 'root_ids_cloudpath': root_ids_cloudpath,
  },
  'by': operator_contact(),
  'date': strftime('%Y-%m-%d %H:%M %Z'),

diff --git a/igneous/tasks/image/image.py b/igneous/tasks/image/image.py
@@ -70,7 +70,7 @@ def downsample_and_upload(
  if max_mips is not None:
  factors = factors[:max_mips]
 
- if len(factors) == 0:
+ if len(factors) == 0 and max_mips:
  print("No factors generated. Image Shape: {}, Downsample Shape: {}, Volume Shape: {}, Bounds: {}".format(
  image.shape, ds_shape, vol.volume_size, bounds)
  )
@@ -327,6 +327,9 @@ def execute(self):
  cts = np.bincount(img2d)
  levels[0:len(cts)] += cts.astype(np.uint64)
 
+ if len(bboxes) == 0:
+ return
+
  covered_area = sum([bbx.volume() for bbx in bboxes])
 
  bboxes = [(bbox.volume(), bbox.size3()) for bbox in bboxes]
@@ -376,7 +379,8 @@ def select_bounding_boxes(self, dataset_bounds):
  patch_start += self.offset
  bbox = Bbox(patch_start, patch_start + sample_shape.size3())
  bbox = Bbox.clamp(bbox, dataset_bounds)
- bboxes.append(bbox)
+ if not bbox.subvoxel():
+ bboxes.append(bbox)
  return bboxes
 
 @queueable