Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Saad Jbabdi
CellCounting
Commits
a27e8595
Commit
a27e8595
authored
Feb 07, 2020
by
Saad Jbabdi
Browse files
cleanup
parent
d82d6b81
Changes
30
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
a27e8595
*.npz
test
*~
__pycache__
.DS_Store
*.ipynb
build
\ No newline at end of file
CellCounting/click_cells/.DS_Store
deleted
100644 → 0
View file @
d82d6b81
File deleted
CellCounting/models/.DS_Store
deleted
100644 → 0
View file @
d82d6b81
File deleted
CellCounting/models/
create_model
.py
→
CellCounting/models/
model_utils
.py
View file @
a27e8595
...
...
@@ -9,13 +9,36 @@
from
keras.models
import
Sequential
,
Model
from
keras.layers.convolutional
import
Convolution2D
,
MaxPooling2D
from
keras.layers
import
Activation
,
Flatten
,
Dense
,
Dropout
,
BatchNormalization
,
Input
,
Concatenate
from
keras
import
optimizers
from
keras
import
backend
as
K
from
keras.preprocessing.image
import
ImageDataGenerator
import
time
import
os
# Model names
# convnet_1 : our first stab at the problem
def
from_name
(
shape
,
arch
=
'convnet_1'
):
def
create_from_name
(
shape
,
arch
=
'convnet_1'
):
"""
Basic deep CNN
Parameters
----------
shape : tuple.
Typically NxNx3 for 3 channels
arch : str
'convnet_1' is a succession of conv2D,maxPool,Batchnorm with softmax at the end
Returns
-------
keras Model object
"""
if
(
arch
==
'convnet_1'
):
print
(
'BUILDING A CONVNET'
)
model
=
Sequential
()
...
...
@@ -71,3 +94,66 @@ def from_name(shape,arch='convnet_1'):
return
model
def
build_model
(
shape
,
arch
=
'convnet_1'
):
"""
creates and build a model
"""
model
=
create_from_name
(
shape
,
arch
)
# compile model
adam
=
optimizers
.
Adam
(
lr
=
0.0001
,
beta_1
=
0.9
,
beta_2
=
0.999
,
epsilon
=
1e-08
,
decay
=
0.0
)
model
.
compile
(
optimizer
=
adam
,
loss
=
'binary_crossentropy'
,
metrics
=
[
'accuracy'
])
return
model
def
train_model
(
model
,
data
,
gpu
=
False
,
augment
=
False
,
verbose
=
False
,
epochs
=
100
,
batch_size
=
32
):
"""
Prespecified training regime
"""
# train model
X_train
,
y_train
,
X_test
,
y_test
=
data
start
=
time
.
time
()
if
gpu
is
True
:
K
.
tensorflow_backend
.
_get_available_gpus
()
print
(
'* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)'
)
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
print
(
'* Running forward pass on CPU'
)
if
augment
is
True
:
print
(
'* Using data augmentation'
)
datagen
=
ImageDataGenerator
(
rotation_range
=
90
,
# randomly rotate images in the range (degrees, 0 to 180)
width_shift_range
=
.
25
,
# randomly shift images horizontally (fraction of total width)
height_shift_range
=
.
25
,
# randomly shift images vertically (fraction of total height)
horizontal_flip
=
True
,
# randomly flip images
vertical_flip
=
True
)
# randomly flip images
datagen
.
fit
(
X_train
)
info
=
model
.
fit_generator
(
datagen
.
flow
(
X_train
,
y_train
,
batch_size
=
batch_size
),
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
else
:
print
(
'* NOT using data augmentation'
)
info
=
model
.
fit
(
X_train
,
y_train
,
batch_size
=
batch_size
,
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
end
=
time
.
time
()
print
(
'Model took {:.2f} seconds to train'
.
format
(
end
-
start
))
return
info
CellCounting/models/train_model.py
View file @
a27e8595
...
...
@@ -7,24 +7,27 @@
# import modules
# General
import
numpy
as
np
import
time
,
os
,
sys
import
argparse
# DL stuff
from
keras.models
import
Sequential
from
keras.layers.convolutional
import
Convolution2D
,
MaxPooling2D
from
keras.layers
import
Activation
,
Flatten
,
Dense
,
Dropout
,
BatchNormalization
from
keras
import
backend
as
K
from
keras
import
optimizers
from
keras.utils
import
np_utils
from
keras.preprocessing.image
import
ImageDataGenerator
# Other
import
numpy
as
np
import
time
,
os
,
sys
import
pandas
as
pd
from
CellCounting.models
import
model_utils
as
mod
from
CellCounting.utils
import
db
from
CellCounting.models
import
create_model
from
keras.utils
import
np_utils
# ------------------------------ DATA ------------------------------ #
def
prepare_data
(
celldb
,
args
):
"""
Split data in celldb into train and test data
divide by 255
Normalise data and save the training mean and std to be applied to test data
Return X_train,y_train,X_test,y_test
"""
X_train
,
y_train
,
X_test
,
y_test
=
celldb
.
split_train_test
(
split
=
args
.
split
)
# Normalise images
...
...
@@ -54,18 +57,12 @@ def prepare_data(celldb, args):
# ------------------------------ MODEL ------------------------------ #
def
build_model
(
shape
,
arch
=
'convnet_1'
):
model
=
create_model
.
from_name
(
shape
,
arch
)
# compile model
adam
=
optimizers
.
Adam
(
lr
=
0.0001
,
beta_1
=
0.9
,
beta_2
=
0.999
,
epsilon
=
1e-08
,
decay
=
0.0
)
model
.
compile
(
optimizer
=
adam
,
loss
=
'binary_crossentropy'
,
metrics
=
[
'accuracy'
])
return
model
def
accuracy
(
test_x
,
test_y
,
model
):
pred
=
model
.
predict
(
test_x
)
"""
Precision and recall
"""
pred
=
mod
.
predict
(
test_x
)
TP
=
((
pred
[:,
1
]
>
pred
[:,
0
])
&
(
test_y
[:,
1
]
>
test_y
[:,
0
])).
sum
()
FP
=
((
pred
[:,
1
]
>
pred
[:,
0
])
&
(
test_y
[:,
1
]
<
test_y
[:,
0
])).
sum
()
...
...
@@ -77,132 +74,95 @@ def accuracy(test_x, test_y, model):
return
prec
,
recall
def
train_model
(
model
,
celldb
,
args
):
# train model
X_train
,
y_train
,
X_test
,
y_test
=
prepare_data
(
celldb
,
args
)
start
=
time
.
time
()
if
args
.
gpu
==
True
:
K
.
tensorflow_backend
.
_get_available_gpus
()
print
(
'* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)'
)
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
print
(
'* Running forward pass on CPU'
)
# data augmentation
DataAugment
=
args
.
augment
# Fitting params
epochs
=
args
.
epochs
batch_size
=
args
.
batch_size
if
(
DataAugment
):
print
(
'* Using data augmentation'
)
datagen
=
ImageDataGenerator
(
rotation_range
=
90
,
# randomly rotate images in the range (degrees, 0 to 180)
width_shift_range
=
.
25
,
# randomly shift images horizontally (fraction of total width)
height_shift_range
=
.
25
,
# randomly shift images vertically (fraction of total height)
horizontal_flip
=
True
,
# randomly flip images
vertical_flip
=
True
)
# randomly flip images
datagen
.
fit
(
X_train
)
info
=
model
.
fit_generator
(
datagen
.
flow
(
X_train
,
y_train
,
batch_size
=
batch_size
),
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
else
:
print
(
'* NOT using data augmentation'
)
info
=
model
.
fit
(
X_train
,
y_train
,
batch_size
=
batch_size
,
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
end
=
time
.
time
()
print
(
'Model took {:.2f} seconds to train'
.
format
(
end
-
start
))
prec
,
recall
=
accuracy
(
X_test
,
y_test
,
model
)
print
(
'Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'
.
format
(
prec
,
recall
))
return
info
# SAVE MODEL AND FITTING HISTORY
def
save_results
(
model
,
info
,
args
):
if
not
os
.
path
.
exists
(
args
.
out
):
os
.
makedirs
(
args
.
out
)
outfile
=
os
.
path
.
join
(
args
.
out
,
'model.h5'
)
model
.
save
(
outfile
)
outhist
=
os
.
path
.
join
(
args
.
out
,
'model_hist.csv'
)
df
=
pd
.
DataFrame
(
info
.
history
)
df
.
to_csv
(
outhist
)
return
# Input
# - List of DBs
# - Basename folder for output
# - options for the fitting
# - GPU/Augmentation/ModelType?/train-test split/etc.
# Output
# - model.h5
# - model history
# - normalisation sub-folder
def
main
():
p
=
argparse
.
ArgumentParser
(
description
=
'Train model on some data'
)
# Optional arguments
p
.
add_argument
(
'--gpu'
,
default
=
False
,
type
=
lambda
s
:
s
.
lower
()
in
[
'true'
,
't'
,
'yes'
,
'1'
],
metavar
=
'<bool>'
,
help
=
'try to use GPU if True (default), use CPU if False'
)
p
.
add_argument
(
'--epochs'
,
default
=
100
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'number of training epochs (default=100)'
)
p
.
add_argument
(
'--batch_size'
,
default
=
32
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'batch size (default=32)'
)
p
.
add_argument
(
'--split'
,
default
=
0.1
,
type
=
float
,
metavar
=
'<float>'
,
help
=
'train/test split (default=0.1)'
)
p
.
add_argument
(
'--model'
,
default
=
'convnet'
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'choose model amongst [convet,...] (default=convnet)'
)
p
.
add_argument
(
'--load_model'
,
default
=
None
,
type
=
str
,
metavar
=
'<str>.h5'
,
help
=
'load pretrained model'
)
p
.
add_argument
(
'--augment'
,
default
=
False
,
type
=
lambda
s
:
s
.
lower
()
in
[
'true'
,
't'
,
'yes'
,
'1'
],
metavar
=
'<bool>'
,
help
=
'use data augmentation (default=False)'
)
required
=
p
.
add_argument_group
(
'Required arguments'
)
optional
=
p
.
add_argument_group
(
'Optional arguments'
)
# Required arguments
required
=
p
.
add_argument_group
(
'Required arguments'
)
required
.
add_argument
(
'-d'
,
'--data'
,
required
=
True
,
type
=
str
,
nargs
=
'+'
,
metavar
=
'<str>npz'
,
help
=
'input databases'
)
required
.
add_argument
(
'-o'
,
'--out'
,
required
=
True
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'output basename'
)
# Parse arguments
# Optional arguments
optional
.
add_argument
(
'--use_gpu'
,
action
=
"store_true"
,
help
=
'try to use GPU'
)
optional
.
add_argument
(
'--epochs'
,
default
=
100
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'number of training epochs (default=100)'
)
optional
.
add_argument
(
'--batch_size'
,
default
=
32
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'batch size (default=32)'
)
optional
.
add_argument
(
'--split'
,
default
=
0.1
,
type
=
float
,
metavar
=
'<float>'
,
help
=
'train/test split (default=0.1)'
)
optional
.
add_argument
(
'--model'
,
default
=
'convnet'
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'choose model amongst [convet,...] (default=convnet)'
)
optional
.
add_argument
(
'--load_model'
,
default
=
None
,
type
=
str
,
metavar
=
'<str>.h5'
,
help
=
'load pretrained model'
)
optional
.
add_argument
(
'--augment'
,
action
=
"store_true"
,
help
=
'use data augmentation (default=False)'
)
optional
.
add_argument
(
'--verbose'
,
action
=
"store_true"
,
help
=
'spit out verbose info'
)
# ---------- Parse arguments ----------- #
args
=
p
.
parse_args
()
# Do the work
print
(
'* Preparing image database'
)
# ---------- Prepare the data ----------- #
if
args
.
verbose
:
print
(
'* Preparing image database'
)
celldb
=
db
.
CellDB
()
celldb
.
load_from_files
(
args
.
data
)
celldb
.
equalise_classes
()
celldb
.
summary
()
print
(
'* Preparing and training model'
)
if
args
.
verbose
:
print
(
'* Preparing data'
)
X_train
,
y_train
,
X_test
,
y_test
=
prepare_data
(
celldb
,
args
)
# ---------- Begin training ----------- #
if
args
.
verbose
:
print
(
'* Preparing and training model'
)
shape
=
celldb
.
images
.
shape
[
1
:]
if
args
.
load_model
is
not
None
:
print
(
'** Loading pretrained model'
)
if
args
.
verbose
:
print
(
'** Loading pretrained model'
)
from
keras.models
import
load_model
model
=
load_model
(
args
.
load_model
)
else
:
model
=
build_model
(
shape
,
args
.
model
)
info
=
train_model
(
model
,
celldb
,
args
)
model
=
mod
.
build_model
(
shape
,
args
.
model
)
TrainingArgs
=
{
'gpu'
:
args
.
gpu
,
'augment'
:
args
.
augment
,
'verbose'
:
args
.
verbose
}
info
=
mod
.
train_model
(
model
=
model
,
data
=
[
X_train
,
y_train
,
X_test
,
y_test
],
**
TrainingArgs
)
prec
,
recall
=
accuracy
(
X_test
,
y_test
,
model
)
if
args
.
verbose
:
print
(
'Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'
.
format
(
prec
,
recall
))
if
args
.
verbose
:
print
(
'* Saving results'
)
if
not
os
.
path
.
exists
(
args
.
out
):
os
.
makedirs
(
args
.
out
)
outfile
=
os
.
path
.
join
(
args
.
out
,
'model.h5'
)
model
.
save
(
outfile
)
outhist
=
os
.
path
.
join
(
args
.
out
,
'model_hist.csv'
)
df
=
pd
.
DataFrame
(
info
.
history
)
df
.
to_csv
(
outhist
)
print
(
'* Saving results'
)
save_results
(
model
,
info
,
args
)
print
(
'Done'
)
if
args
.
verbose
:
print
(
'Done'
)
if
__name__
==
'__main__'
:
main
()
...
...
CellCounting/utils/__pycache__/db.cpython-36.pyc
View file @
a27e8595
No preview for this file type
build/lib/CellCounting/ClickCells/__init__.py
deleted
100644 → 0
View file @
d82d6b81
build/lib/CellCounting/ClickCells/click_cells.py
deleted
100644 → 0
View file @
d82d6b81
#!/usr/bin/env python
#from PIL import Image
import
tifffile
as
tif
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
os
import
os.path
as
op
import
re
import
glob
import
argparse
import
shutil
import
random
class
CellPicker
(
object
):
def
__init__
(
self
,
figure
,
axis
):
self
.
points
=
[]
self
.
figure
=
figure
self
.
axis
=
axis
self
.
figure
.
canvas
.
mpl_connect
(
'button_press_event'
,
self
.
on_press
)
self
.
figure
.
canvas
.
mpl_connect
(
'key_press_event'
,
self
.
on_key_undo
)
self
.
figure
.
canvas
.
mpl_connect
(
'key_press_event'
,
self
.
on_key_quit
)
self
.
figure
.
canvas
.
mpl_connect
(
'key_press_event'
,
self
.
on_key_dunno
)
def
on_press
(
self
,
ev
):
self
.
points
.
append
((
ev
.
xdata
,
ev
.
ydata
))
points_array
=
np
.
array
(
self
.
points
)
self
.
axis
.
plot
(
points_array
[:,
0
],
points_array
[:,
1
],
marker
=
'o'
,
linestyle
=
'None'
,
markersize
=
5
,
color
=
"red"
)
self
.
figure
.
canvas
.
draw
()
def
on_key_quit
(
self
,
event
):
if
event
.
key
==
'q'
:
plt
.
close
()
exit
()
def
on_key_undo
(
self
,
event
):
if
event
.
key
==
'u'
:
self
.
points
.
pop
(
-
1
)
points_array
=
np
.
array
(
self
.
points
)
self
.
axis
.
clear
()
self
.
axis
.
plot
(
points_array
[:,
0
],
points_array
[:,
1
],
marker
=
'o'
,
linestyle
=
'None'
,
markersize
=
5
,
color
=
"red"
)
self
.
figure
.
canvas
.
draw
()
def
on_key_dunno
(
self
,
event
):
if
event
.
key
==
'n'
:
plt
.
close
()
def
main
():
# Parse command line arguments
parser
=
argparse
.
ArgumentParser
(
"Click on cells and save to txt file"
)
parser
.
add_argument
(
"-i"
,
"--input_folder"
,
required
=
True
,
help
=
"Input folder (_splitted)."
)
parser
.
add_argument
(
"-o"
,
"--output_cell_coordinates"
,
required
=
True
,
help
=
"Output file name."
)
parser
.
add_argument
(
"--shuffle"
,
action
=
'store_true'
,
default
=
False
,
dest
=
'shuffle'
,
help
=
"Load sub-images in random order."
)
parser
.
add_argument
(
"--append"
,
action
=
'store_true'
,
default
=
False
,
dest
=
'append'
,
help
=
"Append results to output file."
)
parser
.
add_argument
(
"--empty_zone"
,
action
=
'store_true'
,
default
=
False
,
dest
=
"empty_zone"
,
help
=
"Entire zone is empty"
)
args
=
parser
.
parse_args
()
# Find Numpy array files
infolder
=
args
.
input_folder
outfile
=
args
.
output_cell_coordinates
files
=
glob
.
glob
(
op
.
join
(
infolder
,
'*_w_*_h_*.npy'
))
if
args
.
shuffle
==
True
:
random
.
shuffle
(
files
)
create_header
=
True
if
op
.
exists
(
outfile
):
if
args
.
append
==
True
:
create_header
=
False
else
:
print
(
"File {} exists. Overwrite/Append/Exit?[O,A,E]"
.
format
(
outfile
))
response
=
input
()
if
response
.
upper
()
==
"O"
:
os
.
remove
(
outfile
)
elif
response
.
upper
()
==
"E"
:
print
(
"Exiting without doing anything"
)
exit
()
elif
response
.
upper
()
==
"A"
:
create_header
=
False
if
create_header
==
True
:
with
open
(
outfile
,
'w'
)
as
f
:
f
.
write
(
'Sub-Image-File
\t
Xcoord
\t
Ycoord
\n
'
)
counter
=
0
for
file
in
files
:
#First, get h and w values corresponding to sub-image from filename.
#This is in order to export x,y coordinates relative to original brain_slice image.
res
=
re
.
findall
(
"w_(\d+).(\d+)_h_(\d+).(\d+)"
,
file
)[
0
]
w
=
round
(
float
(
res
[
0
]
+
"."
+
res
[
1
]))
h
=
round
(
float
(
res
[
2
]
+
"."
+
res
[
3
]))
if
args
.
empty_zone
==
True
:
with
open
(
outfile
,
'a'
)
as
f
:
f
.
write
(
'%s
\t
NaN
\t
NaN
\n
'
%
file
)
else
:
#prepare figure and load .npy files in as an image, ready to show
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
im
=
ax
.
imshow
(
np
.
load
(
file
))
ax
.
set_xlim
(
ax
.
get_xlim
())
ax
.
set_ylim
(
ax
.
get_ylim
())
#Instantiate CellPicker class
p
=
CellPicker
(
fig
,
ax
)
counter
+=
1
ax
.
set_title
(
"Click on cells.
\n
"
"'q'=quit, 'u'=undo, 'n'=dunno
\n
"
"Close if you can't see any cells
\n
"
"Processed {}/{}"
.
format
(
counter
,
len
(
files
)))
#Once past this we move to the next image...
plt
.
show
()
# Write x,y coordinates to the output file,
# adding h and w values in order to translate into coordinates
# of original image.
with
open
(
outfile
,
'a'
)
as
f
:
if
not
p
.
points
:
f
.
write
(
'%s
\t
NaN
\t
NaN
\n
'
%
file
)
else
:
for
point
in
p
.
points
:
f
.
write
(
'%20s
\t
%12f
\t
%12f
\n
'
%
(
os
.
path
.
abspath
(
file
),
point
[
1
]
+
w
,
point
[
0
]
+
h
))
if
__name__
==
'__main__'
:
main
()
build/lib/CellCounting/ClickCells/create_db.py
deleted
100644 → 0
View file @
d82d6b81
#!/usr/bin/env python3
import
argparse
import
numpy
as
np
import
pandas
as
pd
import
re
DB_IMAGE_RES
=
64
def
check_imshape
(
shape
):
sx
,
sy
,
_
=
shape
if
sx
!=
sy
:
return
False
if
(
sx
%
DB_IMAGE_RES
!=
0
)
or
(
sy
%
DB_IMAGE_RES
!=
0
):
return
False
return
True
def
append_file_content
(
fname
,
image_list
,
count_list
):
df
=
pd
.
read_table
(
fname
)
udf
=
df
.
groupby
(
'Sub-Image-File'
).
count
()
for
f
in
udf
.
index
:
# Load Numpy array
im
=
np
.
load
(
f
.
strip
())
if
not
check_imshape
(
im
.
shape
):
print
(
"Error: Bad Image dimensions. Must be square and multiple of {}"
.
format
(
DB_IMAGE_RES
))
sizx
,
sizy
,
_
=
im
.
shape
# Split into sub-zones
size_ratio
=
sizx
//
DB_IMAGE_RES
im2
=
im
.
reshape
(
size_ratio
,
sizx
//
size_ratio
,
size_ratio
,
sizy
//
size_ratio
,
3
)
im3
=
im2
.
transpose
(
0
,
2
,
1
,
3
,
4
).
reshape
(
size_ratio
**
2
,
sizx
//
size_ratio
,
sizy
//
size_ratio
,
3
)
image_list
.
append
(
im3
)
res
=
re
.
findall
(
"w_(\d+).(\d+)_h_(\d+).(\d+)"
,
f
)[
0
]
W
=
round
(
float
(
res
[
0
]
+
"."
+
res
[
1
]))
H
=
round
(
float
(
res
[
2
]
+
"."
+
res
[
3
]))
count_cells
=
np
.
zeros
((
size_ratio
,
size_ratio
),
dtype
=
int
)
for
indiv_cells
in
df
.
values
[
df
[
'Sub-Image-File'
]
==
f
]:
if
np
.
isnan
(
indiv_cells
[
1
]):
pass
else
:
w
=
float
(
indiv_cells
[
1
])
-
W
h
=
float
(
indiv_cells
[
2
])
-
H
count_cells
[
int
(
w
//
(
sizx
/
size_ratio
)),
int
(
h
//
(
sizy
/
size_ratio
))]
+=
1
count_list
.
append
(
count_cells
.
flatten
())
return
len
(
udf
.
index
)
def
create_db
(
file_list
,
outfile
):
image_list
=
[]
count_list
=
[]
total
=
0
for
f
in
file_list
:
total
+=
append_file_content
(
f
,
image_list
,
count_list
)
shape
=
image_list
[
0
].
shape
[
1
:]
count_list
=
np
.
array
(
count_list
).
flatten
()