Commit 27d49cda authored by inhuszar's avatar inhuszar
Browse files

Multi-CPU support for loading histology files

parent d7f3e358
......@@ -152,36 +152,52 @@ class OpenSlideLoader(GenericLoader):
super(OpenSlideLoader, self).__init__(
storage=storage, dtype=dtype, **kwargs)
self.level = level
self.cpu = ts.CPU_CORES
def __call__(self, f):
super(OpenSlideLoader, self).__call__(f)
import openslide
import multiprocessing as mp
from functools import partial
import psutil
import tempfile
memlimit = psutil.virtual_memory().available
memlimit = min(memlimit, self.kwargs.get("memlimit", memlimit))
itemsize = 4 * 2 * np.dtype(self.dtype).itemsize
batchsize = int(np.sqrt(memlimit / itemsize / self.cpu))
# Load the slide object
obj = openslide.open_slide(f)
w, h = obj.level_dimensions[self.level]
if h * w * itemsize >= memlimit:
import warnings
warnings.warn(f"The current memory limit ({memlimit / 1024 ** 2}) "
f"is lower than needed to load the object "
f"({h * w * itemsize / 1024 ** 2}). Switching to "
f"HDD mode.")
self.storage = HDD
# Create storage space and populate it with the histology image data
if self.storage == MEM:
arr = obj.read_region(
(0, 0), self.level, obj.level_dimensions[self.level])
arr = np.asarray(arr, dtype=self.dtype)
arr = np.empty(shape=(h, w, 4), dtype=self.dtype, order="C")
with mp.pool.ThreadPool(processes=self.cpu) as pool:
jobfunc = partial(openslide_worker_thread, obj=obj,
level=self.level, arr=arr)
jobs = openslide_generate_jobs(h, w, batchsize)
_ = pool.map(jobfunc, jobs)
else:
import psutil
import tempfile
memlimit = psutil.virtual_memory().available
itemsize = 4 * (np.dtype(self.dtype).itemsize + 8)
batchsize = int(np.sqrt(memlimit / itemsize))
w, h = obj.level_dimensions[self.level]
fileno, fname = tempfile.mkstemp(prefix="tiff_", dir=ts.TWD)
arr = np.memmap(fname, dtype=self.dtype, mode="r+", offset=0,
shape=(h, w, 4), order="C")
for row in range(0, h, batchsize):
endrow = min(row + batchsize, h)
for col in range(0, w, batchsize):
endcol = min(col + batchsize, w)
x = col * obj.level_downsamples[self.level]
y = row * obj.level_downsamples[self.level]
arr[row:endrow, col:endcol] = \
obj.read_region((x, y), level=self.level,
size=(endcol - col, endrow - row))
m = (fname, np.dtype(self.dtype).str, (h, w, 4))
with mp.Pool(processes=ts.CPU_CORES) as pool:
lock = mp.Manager().RLock()
jobfunc = partial(openslide_worker_process, slide=f,
level=self.level, arr=m, lock=lock)
jobs = openslide_generate_jobs(h, w, batchsize)
_ = pool.map(jobfunc, jobs)
hdr = dict(input_file=f)
# hdr = {
......@@ -191,6 +207,43 @@ class OpenSlideLoader(GenericLoader):
return arr, hdr
def openslide_worker_thread(job, obj, level, arr):
row, endrow, col, endcol = job
x = int(col * obj.level_downsamples[level])
y = int(row * obj.level_downsamples[level])
region = obj.read_region(
(x, y), level=level, size=(endcol - col, endrow - row))
region = np.asarray(region, dtype=arr.dtype)
arr[row:endrow, col:endcol] = region[...]
def openslide_worker_process(job, slide, level, arr, lock):
row, endrow, col, endcol = job
import openslide
obj = openslide.open_slide(slide)
x = int(col * obj.level_downsamples[level])
y = int(row * obj.level_downsamples[level])
region = obj.read_region(
(x, y), level=level, size=(endcol - col, endrow - row))
lock.acquire()
fname, dtype, (h, w, c) = arr
offset = row * w * c * np.dtype(dtype).itemsize
arr = np.memmap(fname, dtype=dtype, mode="r+", offset=offset,
shape=(endrow - row, w, c), order="C")
arr[:, col:endcol] = np.asarray(region, dtype=arr.dtype)
del arr
lock.release()
def openslide_generate_jobs(h, w, batchsize):
for row in range(0, h, batchsize):
endrow = min(row + batchsize, h)
for col in range(0, w, batchsize):
endcol = min(col + batchsize, w)
print(f"Loading ({row}, {col}) - ({endrow}, {endcol})")
yield row, endrow, col, endcol
class NiBabelLoader(GenericLoader):
SUPPORTED_IMAGE_TYPES = (".nii", ".nii.gz")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment