Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Istvan N. Huszar, MD
tirl
Commits
27d49cda
Commit
27d49cda
authored
Jul 26, 2020
by
inhuszar
Browse files
Multi-CPU support for loading histology files
parent
d7f3e358
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/tirl/loader.py
View file @
27d49cda
...
...
@@ -152,36 +152,52 @@ class OpenSlideLoader(GenericLoader):
super
(
OpenSlideLoader
,
self
).
__init__
(
storage
=
storage
,
dtype
=
dtype
,
**
kwargs
)
self
.
level
=
level
self
.
cpu
=
ts
.
CPU_CORES
def
__call__
(
self
,
f
):
super
(
OpenSlideLoader
,
self
).
__call__
(
f
)
import
openslide
import
multiprocessing
as
mp
from
functools
import
partial
import
psutil
import
tempfile
memlimit
=
psutil
.
virtual_memory
().
available
memlimit
=
min
(
memlimit
,
self
.
kwargs
.
get
(
"memlimit"
,
memlimit
))
itemsize
=
4
*
2
*
np
.
dtype
(
self
.
dtype
).
itemsize
batchsize
=
int
(
np
.
sqrt
(
memlimit
/
itemsize
/
self
.
cpu
))
# Load the slide object
obj
=
openslide
.
open_slide
(
f
)
w
,
h
=
obj
.
level_dimensions
[
self
.
level
]
if
h
*
w
*
itemsize
>=
memlimit
:
import
warnings
warnings
.
warn
(
f
"The current memory limit (
{
memlimit
/
1024
**
2
}
) "
f
"is lower than needed to load the object "
f
"(
{
h
*
w
*
itemsize
/
1024
**
2
}
). Switching to "
f
"HDD mode."
)
self
.
storage
=
HDD
# Create storage space and populate it with the histology image data
if
self
.
storage
==
MEM
:
arr
=
obj
.
read_region
(
(
0
,
0
),
self
.
level
,
obj
.
level_dimensions
[
self
.
level
])
arr
=
np
.
asarray
(
arr
,
dtype
=
self
.
dtype
)
arr
=
np
.
empty
(
shape
=
(
h
,
w
,
4
),
dtype
=
self
.
dtype
,
order
=
"C"
)
with
mp
.
pool
.
ThreadPool
(
processes
=
self
.
cpu
)
as
pool
:
jobfunc
=
partial
(
openslide_worker_thread
,
obj
=
obj
,
level
=
self
.
level
,
arr
=
arr
)
jobs
=
openslide_generate_jobs
(
h
,
w
,
batchsize
)
_
=
pool
.
map
(
jobfunc
,
jobs
)
else
:
import
psutil
import
tempfile
memlimit
=
psutil
.
virtual_memory
().
available
itemsize
=
4
*
(
np
.
dtype
(
self
.
dtype
).
itemsize
+
8
)
batchsize
=
int
(
np
.
sqrt
(
memlimit
/
itemsize
))
w
,
h
=
obj
.
level_dimensions
[
self
.
level
]
fileno
,
fname
=
tempfile
.
mkstemp
(
prefix
=
"tiff_"
,
dir
=
ts
.
TWD
)
arr
=
np
.
memmap
(
fname
,
dtype
=
self
.
dtype
,
mode
=
"r+"
,
offset
=
0
,
shape
=
(
h
,
w
,
4
),
order
=
"C"
)
for
row
in
range
(
0
,
h
,
batchsize
):
endrow
=
min
(
row
+
batchsize
,
h
)
for
col
in
range
(
0
,
w
,
batchsize
):
endcol
=
min
(
col
+
batchsize
,
w
)
x
=
col
*
obj
.
level_downsamples
[
self
.
level
]
y
=
row
*
obj
.
level_downsamples
[
self
.
level
]
arr
[
row
:
endrow
,
col
:
endcol
]
=
\
obj
.
read_region
((
x
,
y
),
level
=
self
.
level
,
size
=
(
endcol
-
col
,
endrow
-
row
))
m
=
(
fname
,
np
.
dtype
(
self
.
dtype
).
str
,
(
h
,
w
,
4
))
with
mp
.
Pool
(
processes
=
ts
.
CPU_CORES
)
as
pool
:
lock
=
mp
.
Manager
().
RLock
()
jobfunc
=
partial
(
openslide_worker_process
,
slide
=
f
,
level
=
self
.
level
,
arr
=
m
,
lock
=
lock
)
jobs
=
openslide_generate_jobs
(
h
,
w
,
batchsize
)
_
=
pool
.
map
(
jobfunc
,
jobs
)
hdr
=
dict
(
input_file
=
f
)
# hdr = {
...
...
@@ -191,6 +207,43 @@ class OpenSlideLoader(GenericLoader):
return
arr
,
hdr
def
openslide_worker_thread
(
job
,
obj
,
level
,
arr
):
row
,
endrow
,
col
,
endcol
=
job
x
=
int
(
col
*
obj
.
level_downsamples
[
level
])
y
=
int
(
row
*
obj
.
level_downsamples
[
level
])
region
=
obj
.
read_region
(
(
x
,
y
),
level
=
level
,
size
=
(
endcol
-
col
,
endrow
-
row
))
region
=
np
.
asarray
(
region
,
dtype
=
arr
.
dtype
)
arr
[
row
:
endrow
,
col
:
endcol
]
=
region
[...]
def
openslide_worker_process
(
job
,
slide
,
level
,
arr
,
lock
):
row
,
endrow
,
col
,
endcol
=
job
import
openslide
obj
=
openslide
.
open_slide
(
slide
)
x
=
int
(
col
*
obj
.
level_downsamples
[
level
])
y
=
int
(
row
*
obj
.
level_downsamples
[
level
])
region
=
obj
.
read_region
(
(
x
,
y
),
level
=
level
,
size
=
(
endcol
-
col
,
endrow
-
row
))
lock
.
acquire
()
fname
,
dtype
,
(
h
,
w
,
c
)
=
arr
offset
=
row
*
w
*
c
*
np
.
dtype
(
dtype
).
itemsize
arr
=
np
.
memmap
(
fname
,
dtype
=
dtype
,
mode
=
"r+"
,
offset
=
offset
,
shape
=
(
endrow
-
row
,
w
,
c
),
order
=
"C"
)
arr
[:,
col
:
endcol
]
=
np
.
asarray
(
region
,
dtype
=
arr
.
dtype
)
del
arr
lock
.
release
()
def
openslide_generate_jobs
(
h
,
w
,
batchsize
):
for
row
in
range
(
0
,
h
,
batchsize
):
endrow
=
min
(
row
+
batchsize
,
h
)
for
col
in
range
(
0
,
w
,
batchsize
):
endcol
=
min
(
col
+
batchsize
,
w
)
print
(
f
"Loading (
{
row
}
,
{
col
}
) - (
{
endrow
}
,
{
endcol
}
)"
)
yield
row
,
endrow
,
col
,
endcol
class
NiBabelLoader
(
GenericLoader
):
SUPPORTED_IMAGE_TYPES
=
(
".nii"
,
".nii.gz"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment