Source code for ImageLayer.HDF5

from Datasource import Datasource
import numpy as np
import h5py
import json
import os

[docs]class HDF5(Datasource): """ Loads images from hdf5 files Attributes ------------ _read: list All load functions for :data:`_meta_files` """ # All readers for _meta_files _read = [json.load] #: All extensions of files pointing to h5 _meta_files = ['.json'] @staticmethod def dtype(n): only_pos = dict(zip( map(np.dtype, ('int64', 'int32', 'int16', 'int8')), map(np.dtype, ('uint64', 'uint32', 'uint16', 'uint8')) )) d = n.dtype return only_pos.get(d,d)
[docs] @staticmethod def load_tile(t_query): """load a single tile (image) Gets the image path from the \ :data:`TileQuery.RUNTIME`. ``IMAGE`` attribute. Gets the position of the image with the whole \ volume from :meth:`TileQuery.all_scales`, \ :meth:`TileQuery.tile_origin`, and \ :meth:`TileQuery.blocksize`. Arguments ----------- t_query: :class:`TileQuery` With file path and image position Returns ----------- np.ndarray An image array that may be as large \ as an entire full resolution slice of \ the whole hdf5 volume. Based on the value \ of :meth:`TileQuery.all_scales`, this array \ will likely be downsampled by to a small fraction \ of the full tile resolution. """ # call superclass Datasource.load_tile(t_query) # Load data for all the h5 files h5_files = t_query.RUNTIME.IMAGE.SOURCE.HDF5.VALUE # Get all the z indices and coordinates z_stops = list(enumerate(zip(*h5_files)[-1])) z_starts = z_stops[::-1] # Find the region to crop sk,sj,si = t_query.all_scales [z0,y0,x0],[z1,y1,x1] = t_query.source_tile_bounds # Get the scaled blocksize for the output array zb,yb,xb = t_query.blocksize # get the right h5 files for the current z index start_z = next((i for i, z in z_starts if z <= z0), 0) stop_z = next((i for i, z in z_stops if z >= z1), len(z_stops)) needed_files = [h5_files[zi] for zi in range(start_z, stop_z)] #### # Load from all needed files #### dtype = getattr(np, t_query.OUTPUT.INFO.TYPE.VALUE) # Make the full volume for all needed file volumes full_vol = np.zeros([zb, yb, xb], dtype = dtype) # Get the first offset offset_0 = needed_files[0][-1] # Loop through all needed h5 files for h5_file in needed_files: # Offset for this file z_offset = h5_file[-1] # Get input and output start iz0 = max(z0 - z_offset, 0) # Scale output bounds by z-scale oz0 = (z_offset - offset_0) // sk # Load the image region from the h5 file with h5py.File(h5_file[0]) as fd: # read from one file vol = fd[h5_file[1]] # Get the input and output end-bounds iz1 = min(z1 - z_offset, vol.shape[0]) # Scale the output bounds by the z-scale dz = iz1 - iz0 oz1 = oz0 + dz // sk # Get the volume from one file file_vol = vol[iz0:iz1:sk, y0:y1:sj, x0:x1:si] yf, xf = file_vol.shape[1:] # Add the volume to the full volume full_vol[oz0:oz1,:yf,:xf] = file_vol # Combined from all files return full_vol
[docs] @staticmethod def load_file(h5_file): """ Load the needed volume from a single h5 File Arguments ----------- t_query: :class:`TileQuery` With file path and image position """
[docs] @staticmethod def preload_source(t_query): """load info from example tile (image) Calls :meth:`valid_path` to get filename and \ inner dataset path for the full h5 image volume. Then gets three needed values from the given \ path from the :class:`TileQuery` t_query Arguments ----------- t_query: :class:`TileQuery` Only the file path is needed Returns -------- dict Will be empty if :meth:`valid_path` finds\ this filname to not give a valid h5 volume. * :class:`RUNTIME` ``.IMAGE.BLOCK.NAME`` (numpy.ndarray) -- 3x1 for any give tile shape * :class:`OUTPUT` ``.INFO.TYPE.NAME`` (str) -- numpy dtype of any given tile * :class:`OUTPUT` ``.INFO.SIZE.NAME`` (numpy.ndarray) -- 3x1 for full volume shape """ # Keyword names output = t_query.OUTPUT.INFO runtime = t_query.RUNTIME.IMAGE k_h5 = runtime.SOURCE.HDF5.NAME # Get the max block size in bytes for a single tile max_bytes = t_query.RUNTIME.CACHE.MAX_BLOCK.VALUE max_bytes = int(max_bytes/64) # Check if path is valid keywords = HDF5.valid_path(t_query) if not keywords: return {} # Validate highest in z file name and dataset filename = keywords[k_h5][-1][0] dataset = keywords[k_h5][-1][1] offset = keywords[k_h5][-1][2] # Load properties from H5 dataset with h5py.File(filename,'r') as fd: # Get the volume vol = fd[dataset] # Get a shape for all the files shape = np.uint32(vol.shape) shape[0] += offset #### # Get a blockshape as a flat section #### # Get the bytes for a full slice voxel_bytes = np.uint32(vol.dtype.itemsize) slice_bytes = voxel_bytes *[1:]) # Get the nearest tile size under cache limit square_overage = np.ceil(slice_bytes / max_bytes) side_scalar = np.ceil(np.sqrt(square_overage)) # Set the actual blocksize to be under the cache limit plane_shape = np.ceil(shape[1:] / side_scalar) max_block = np.r_[[64], plane_shape] #### # Get max blocksizes for different resolutions #### lo_res = 1 # Get all block sizes by halving the max block size all_blocks = [shape/(2**res) for res in range(lo_res)] block_array = np.clip(np.ceil(all_blocks), 1, max_block) # return named keywords keywords.update({ runtime.BLOCK.NAME: np.uint32(block_array), output.SIZE.NAME: np.uint32(shape), output.TYPE.NAME: str(HDF5.dtype(vol)), }) # Combine results with parent method common = Datasource.preload_source(t_query) return dict(common, **keywords)
[docs] @staticmethod def valid_path(t_query): """ Check if filename can access h5 data The filename can be a path to a json file \ that lists an h5 file and dataset path, or \ the filename can be a direct path to an h5 \ file. In either case the 'outer' file path \ directly to the h5 file and the 'inner' \ dataset path will be returned. Arguments ----------- t_query: :class:`TileQuery` Only the file path is needed Returns -------- dict Empty if not a valid h5 volume * :class:`RUNTIME` ``.IMAGE.SOURCE.HDF5.OUTER.NAME`` (str) -- The direct filename to an hdf5 file * :class:`RUNTIME` ``.IMAGE.SOURCE.HDF5.INNER.NAME`` (str) -- The datset in the file with image data """ # Dereference path to hdf5 data k_h5 = t_query.RUNTIME.IMAGE.SOURCE.HDF5.NAME h5_list = HDF5.load_info(t_query) # load all the files for h5_file in h5_list: try: # Try to load one file with h5py.File(h5_file[0],'r') as fd: if h5_file[1] not in fd.keys(): h5_file[1] = fd.keys()[0] except: return {} # sort by z start def z_sort(h_file): return h_file[-1] # return reverse sorted files return { k_h5: sorted(h5_list, key=z_sort) }
[docs] @staticmethod def get_details(h5_info, file_dict): """ Get all needed h5 file info from a pointer file Arguments ---------- file_dict: dict Contains keys for INNER, OUTER, and OFF values Returns -------- list All INNER, OUTER, OFF values in a flat list """ # Get values for actual hdf5 file outer_path = file_dict.get(h5_info.OUTER.NAME) inner_path = file_dict.get(h5_info.INNER.NAME) z_offset = file_dict.get(h5_info.OFF.NAME, 0) return [outer_path, inner_path, z_offset]
[docs] @staticmethod def load_info(t_query): """ Gets the h5 volume filename and datapath If the t_query path has an extension in \ the :data:`_meta_files` and the file contains \ ``RUNTIME.IMAGE.SOURCE.HDF5.OUTER.NAME`` \ and ``RUNTIME.IMAGE.SOURCE.HDF5.INNER.NAME`` \ keys, then the values of those keys are returned. \ If any of those statements is not true, then the \ original t_query path is returned along with the \ default dataset given by \ ``RUNTIME.IMAGE.SOURCE.HDF5.INNER.VALUE``. Arguments ----------- t_query: :class:`TileQuery` Only the file path is needed Returns -------- list * The direct filename to an hdf5 file * The datset in the file with image data """ # Load information about full hdf5 h5_info = t_query.RUNTIME.IMAGE.SOURCE.HDF5 filename = t_query.OUTPUT.INFO.PATH.VALUE dataset = h5_info.INNER.VALUE # Get all details for info def get_details(info): return HDF5.get_details(h5_info, info) # Load path if ends with json ending = os.path.splitext(filename)[1] if ending in HDF5._meta_files: # Get function to read the metainfo file order = HDF5._meta_files.index(ending) try: with open(filename) as infile: # Read the metainfo file info = HDF5._read[order](infile) except IOError: return [[filename, dataset, 0]] ###### ## Handle references to multiple h5 files ## Get first item in list ###### if isinstance(info, list): return map(get_details, info) # Get the inner dataset and the new path return [get_details(info)] return [[filename, dataset, 0]]