Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Saad Jbabdi
CellCounting
Commits
5ae044fd
Commit
5ae044fd
authored
Sep 16, 2018
by
Saad Jbabdi
Browse files
turn into script
parent
0e115986
Changes
1
Hide whitespace changes
Inline
Side-by-side
CellCounting/models/train_model.py
View file @
5ae044fd
#!/
bin/i
python
#coding: utf8
# Train
convnet
for cell recognition
#!/
usr/bin/enc
python
# Train
model
for cell recognition
# Oiwi, 07/2018
# Saad, 09/2018
# import modules
# General
import
numpy
as
np
# ------------------------------ DATA ------------------------------ #
# Create DB with equal numbers of cell/no cells images
def
get_balanced_data
(
data
,
labels
,
split
=
0.0
):
classes
,
counts
=
np
.
unique
(
labels
,
return_counts
=
True
)
nPerClass
=
counts
.
min
()
nClasses
=
len
(
classes
)
idx_train
=
[]
idx_test
=
[]
for
cl
in
classes
:
cIdxs
=
np
.
where
(
labels
==
cl
)[
0
]
cIdxs
=
np
.
random
.
choice
(
cIdxs
,
nPerClass
,
replace
=
False
)
n
=
int
((
1.0
-
split
)
*
len
(
cIdxs
))
idx_train
.
extend
(
cIdxs
[:
n
])
idx_test
.
extend
(
cIdxs
[
n
:])
data_train
=
data
[
idx_train
,...].
astype
(
float
)
labels_train
=
labels
[
idx_train
]
data_test
=
data
[
idx_test
,...].
astype
(
float
)
labels_test
=
labels
[
idx_test
]
return
data_train
,
labels_train
,
data_test
,
labels_test
# LOAD DATA
datafile1
=
'/vols/Data/sj/Haber_Digitisation/Images/celldb_001.npz'
celldb1
=
np
.
load
(
datafile1
)
datafile2
=
'/vols/Data/sj/Haber_Digitisation/Images/celldb_002.npz'
celldb2
=
np
.
load
(
datafile2
)
images
=
np
.
concatenate
((
celldb1
[
'images'
],
celldb2
[
'images'
]))
counts
=
np
.
concatenate
((
celldb1
[
'counts'
],
celldb2
[
'counts'
]))
# Equal number of examplars per class
# Split train/test
img_train
,
lab_train
,
img_val
,
lab_val
=
get_balanced_data
(
images
,
counts
>
0
,.
1
)
# NORMALISE
img_avg
=
img_train
.
mean
(
axis
=
0
)
img_std
=
img_train
.
std
(
axis
=
0
)
img_train
=
(
img_train
-
img_avg
)
/
img_std
img_val
=
(
img_val
-
img_avg
)
/
img_std
# One-hot labels
from
keras.utils
import
np_utils
n_classes
=
len
(
np
.
unique
(
lab_train
))
lab_train
=
np_utils
.
to_categorical
(
lab_train
,
n_classes
)
lab_val
=
np_utils
.
to_categorical
(
lab_val
,
n_classes
)
# ------------------------------ MODEL ------------------------------ #
import
numpy
as
np
import
time
import
os
# DL stuff
from
keras.models
import
Sequential
from
keras.layers.convolutional
import
Convolution2D
,
MaxPooling2D
from
keras.layers
import
Activation
,
Flatten
,
Dense
,
Dropout
,
BatchNormalization
from
keras
import
backend
as
K
from
keras
import
optimizers
from
keras.utils
import
np_utils
from
keras.preprocessing.image
import
ImageDataGenerator
# Other
import
pandas
as
pd
from
CellCounting.utils
import
db
import
argparse
# The below swaps image dimensions / why is this needed?
#if K.backend()=='tensorflow':
# K.set_image_dim_ordering('th')
# matconvnet model
model
=
Sequential
()
model
.
add
(
Convolution2D
(
20
,
(
5
,
5
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
input_shape
=
img_train
.
shape
[
1
:]))
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
),
strides
=
2
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
50
,
(
5
,
5
),
strides
=
(
1
,
1
),
padding
=
'valid'
))
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
),
strides
=
4
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
500
,
(
5
,
5
),
strides
=
(
2
,
2
),
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
2
,
(
2
,
2
),
strides
=
1
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
2
,
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
))
model
.
add
(
Flatten
())
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'softmax'
))
model
.
summary
()
# compile model
adam
=
optimizers
.
Adam
(
lr
=
0.0001
,
beta_1
=
0.9
,
beta_2
=
0.999
,
epsilon
=
1e-08
,
decay
=
0.0
)
model
.
compile
(
optimizer
=
adam
,
loss
=
'binary_crossentropy'
,
metrics
=
[
'accuracy'
])
# log settings
from
keras.callbacks
import
CSVLogger
from
keras.callbacks
import
ModelCheckpoint
OutputDir
=
'/vols/Data/sj/Haber_Digitisation/Images/results'
csv_logger
=
CSVLogger
(
OutputDir
+
'/haber1_loss.log'
)
checkpointer
=
ModelCheckpoint
(
filepath
=
OutputDir
+
'/haber1_weights.hdf5'
,
verbose
=
1
,
save_best_only
=
True
)
# ------------------------------ DATA ------------------------------ #
def
prepare_data
(
celldb
,
args
):
X_train
,
y_train
,
X_test
,
y_test
=
celldb
.
split_train_test
(
split
=
args
.
split
)
# Normalise images
#X_train = X_train.astype(np.float32) / 255.0
#X_test = X_test.astype(np.float32) / 255.0
img_avg
=
X_train
.
mean
(
axis
=
0
)
img_std
=
X_train
.
std
(
axis
=
0
)
X_train
=
(
X_train
-
img_avg
)
/
img_std
X_test
=
(
X_test
-
img_avg
)
/
img_std
# One-hot labels
# train model
import
time
start
=
time
.
time
(
)
n_classes
=
len
(
np
.
unique
(
y_train
))
y_train
=
np_utils
.
to_categorical
(
y_train
,
n_classes
)
y_test
=
np_utils
.
to_categorical
(
y_test
,
n_classes
)
# Save mean/std
np
.
save
(
os
.
path
.
join
(
args
.
out
,
'image_normalise'
,
'img_avg.npy'
),
img_avg
)
np
.
save
(
os
.
path
.
join
(
args
.
out
,
'image_normalise'
,
'img_std.npy'
),
img_std
)
return
X_train
,
y_train
,
X_test
,
y_test
# train model
import
os
gpu
=
True
if
gpu
==
True
:
K
.
tensorflow_backend
.
_get_available_gpus
()
print
(
'* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)'
)
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
print
(
'* Running forward pass on CPU'
)
# ------------------------------ MODEL ------------------------------ #
def
create_model
(
shape
,
arch
=
'convnet'
):
if
(
arch
==
'convnet'
):
# matconvnet model
model
=
Sequential
()
model
.
add
(
Convolution2D
(
20
,
(
5
,
5
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
input_shape
=
shape
))
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
),
strides
=
2
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
50
,
(
5
,
5
),
strides
=
(
1
,
1
),
padding
=
'valid'
))
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
),
strides
=
4
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
500
,
(
5
,
5
),
strides
=
(
2
,
2
),
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
2
,
(
2
,
2
),
strides
=
1
,
padding
=
'valid'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Convolution2D
(
2
,
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
))
model
.
add
(
Flatten
())
model
.
add
(
BatchNormalization
())
model
.
add
(
Activation
(
'softmax'
))
else
:
print
(
'Error: Unknown architecture {}'
.
format
(
arch
))
exit
# compile model
adam
=
optimizers
.
Adam
(
lr
=
0.0001
,
beta_1
=
0.9
,
beta_2
=
0.999
,
epsilon
=
1e-08
,
decay
=
0.0
)
model
.
compile
(
optimizer
=
adam
,
loss
=
'binary_crossentropy'
,
metrics
=
[
'accuracy'
])
return
model
def
accuracy
(
test_x
,
test_y
,
model
):
result
=
model
.
predict
(
test_x
)
predicted_class
=
np
.
argmax
(
test_y
,
axis
=
1
)
true_class
=
np
.
argmax
(
test_y
,
axis
=
1
)
num_correct
=
np
.
sum
(
predicted_class
==
true_class
)
accuracy
=
float
(
num_correct
)
/
result
.
shape
[
0
]
return
(
accuracy
*
100
)
pred
=
model
.
predict
(
test_x
)
# data augmentation
DataAugment
=
True
if
(
DataAugment
):
from
keras.preprocessing.image
import
ImageDataGenerator
datagen
=
ImageDataGenerator
(
rotation_range
=
90
,
# randomly rotate images in the range (degrees, 0 to 180)
width_shift_range
=
.
25
,
# randomly shift images horizontally (fraction of total width)
height_shift_range
=
.
25
,
# randomly shift images vertically (fraction of total height)
horizontal_flip
=
True
,
# randomly flip images
vertical_flip
=
True
)
# randomly flip images
datagen
.
fit
(
img_train
)
model_info
=
model
.
fit_generator
(
datagen
.
flow
(
img_train
,
lab_train
,
batch_size
=
32
),
epochs
=
200
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
img_val
,
lab_val
),
callbacks
=
[
csv_logger
,
checkpointer
])
else
:
model_info
=
model
.
fit
(
img_train
,
lab_train
,
batch_size
=
32
,
epochs
=
100
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
img_val
,
lab_val
),
callbacks
=
[
csv_logger
,
checkpointer
])
TP
=
((
pred
[:,
1
]
>
pred
[:,
0
])
&
(
test_y
[:,
1
]
>
test_y
[:,
0
])).
sum
()
FP
=
((
pred
[:,
1
]
>
pred
[:,
0
])
&
(
test_y
[:,
1
]
<
test_y
[:,
0
])).
sum
()
TN
=
((
pred
[:,
1
]
<
pred
[:,
0
])
&
(
test_y
[:,
1
]
<
test_y
[:,
0
])).
sum
()
FN
=
((
pred
[:,
1
]
<
pred
[:,
0
])
&
(
test_y
[:,
1
]
>
test_y
[:,
0
])).
sum
()
prec
=
TP
/
(
TP
+
FP
)
*
100
recall
=
TP
/
(
TP
+
FN
)
*
100
end
=
time
.
time
()
return
prec
,
recall
print
(
"Model took %0.2f seconds to train"
%
(
end
-
start
))
print
(
"Accuracy on test data is: %0.2f"
%
accuracy
(
img_val
,
lab_val
,
model
))
def
train_model
(
model
,
celldb
,
args
):
# train
model
X_train
,
y_train
,
X_test
,
y_test
=
prepare_data
(
celldb
,
args
)
start
=
time
.
time
()
if
args
.
gpu
==
True
:
K
.
tensorflow_backend
.
_get_available_gpus
()
print
(
'* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)'
)
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
print
(
'* Running forward pass on CPU'
)
# data augmentation
DataAugment
=
args
.
augment
# Fitting params
epochs
=
args
.
epochs
batch_size
=
args
.
batch_size
if
(
DataAugment
):
print
(
'* Using data augmentations'
)
datagen
=
ImageDataGenerator
(
rotation_range
=
90
,
# randomly rotate images in the range (degrees, 0 to 180)
width_shift_range
=
.
25
,
# randomly shift images horizontally (fraction of total width)
height_shift_range
=
.
25
,
# randomly shift images vertically (fraction of total height)
horizontal_flip
=
True
,
# randomly flip images
vertical_flip
=
True
)
# randomly flip images
datagen
.
fit
(
X_train
)
info
=
model
.
fit_generator
(
datagen
.
flow
(
X_train
,
y_train
,
batch_size
=
batch_size
),
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
else
:
info
=
model
.
fit
(
X_train
,
y_train
,
batch_size
=
batch_size
,
epochs
=
epochs
,
verbose
=
1
,
shuffle
=
True
,
validation_data
=
(
X_test
,
y_test
))
end
=
time
.
time
()
print
(
'Model took {:.2f} seconds to train'
.
format
(
end
-
start
))
prec
,
recall
=
accuracy
(
X_test
,
y_test
,
model
)
print
(
'Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'
.
format
(
prec
,
recall
))
return
info
# SAVE MODEL AND FITTING HISTORY
import
pandas
as
pd
def
save_results
(
model
,
info
,
args
):
outfile
=
OutputDir
+
'/model_db1+2_augment.h5'
model
.
save
(
outfile
)
outhist
=
OutputDir
+
'/model_db1+2_augment_hist.csv'
df
=
pd
.
DataFrame
(
model_info
.
history
)
df
.
to_csv
(
outhist
)
outfile
=
os
.
path
.
join
(
args
.
out
,
'model.h5'
)
model
.
save
(
outfile
)
outhist
=
os
.
path
.
join
(
args
.
out
,
'model_hist.csv'
)
df
=
pd
.
DataFrame
(
info
.
history
)
df
.
to_csv
(
outhist
)
return
# done
print
(
"Done"
)
# import matplotlib.pyplot as plt
# df = pd.read_csv(OutputDir + '/tmp/tmp_model_hist.csv')
# plt.plot(df['loss'])
# plt.plot(df['val_loss'])
# plt.show()
# Input
# - Predefined model
# - List of DBs
# - Basename folder for output
# - options for the fitting (use GPU, batches, augmentation, etc.)
# - options for the fitting
# - GPU/Augmentation/ModelType?/
# Output
# - model.h5
# - model history
...
...
@@ -209,26 +172,44 @@ print("Done")
def
main
():
p
=
argparse
.
ArgumentParser
(
description
=
'Train model on some data'
)
p
.
add_argument
(
'-o'
,
'--out'
,
default
=
None
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'output basename (default derived from infile)'
)
p
.
add_argument
(
'-s'
,
'--stride'
,
default
=
None
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'stride (default: width of model input filter)'
)
p
.
add_argument
(
'-t'
,
'--timer'
,
default
=
True
,
type
=
bool
,
metavar
=
'<bool>'
,
help
=
'time how fast the forward model is'
)
p
.
add_argument
(
'-n'
,
'--normdir'
,
default
=
None
,
type
=
str
,
metavar
=
'<dir>'
,
help
=
'path to directory that the normalising images live in, img_avg.npy and img_std.npy'
)
# Optional arguments
p
.
add_argument
(
'--gpu'
,
default
=
False
,
type
=
bool
,
metavar
=
'<bool>'
,
help
=
'use GPU if True (default), use CPU if False'
)
p
.
add_argument
(
'--epochs'
,
default
=
100
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'number of training epochs (default=100)'
)
p
.
add_argument
(
'--batch_size'
,
default
=
32
,
type
=
int
,
metavar
=
'<int>'
,
help
=
'batch size (default=32)'
)
p
.
add_argument
(
'--model'
,
default
=
'convnet'
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'choose model amongst [convet,...] (default=convnet)'
)
p
.
add_argument
(
'--augment'
,
default
=
False
,
type
=
bool
,
metavar
=
'<bool>'
,
help
=
'use data augmentation (default=False)'
)
# Required arguments
required
=
p
.
add_argument_group
(
'Required arguments'
)
required
.
add_argument
(
'-
m
'
,
'--
inmodel
'
,
required
=
True
,
type
=
str
,
metavar
=
'<str>.h5
'
,
help
=
'model (e.g. convolutional neural network)
'
)
required
.
add_argument
(
'
infile'
,
type
=
str
,
metavar
=
'<str>
.tif
'
,
help
=
'
in
put
image fil
e'
)
required
.
add_argument
(
'-
d
'
,
'--
data
'
,
required
=
True
,
type
=
str
,
nargs
=
'*'
,
metavar
=
'<str>npz <str>.npz ...
'
,
help
=
'input databases
'
)
required
.
add_argument
(
'
-o'
,
'--out'
,
required
=
True
,
type
=
str
,
metavar
=
'<str>'
,
help
=
'
out
put
basenam
e'
)
# Parse arguments
args
=
p
.
parse_args
()
ffm
(
args
.
infile
,
basename
=
args
.
out
,
inmodel
=
args
.
inmodel
,
stride
=
args
.
stride
,
gpu
=
args
.
gpu
,
timer
=
args
.
timer
,
normdir
=
args
.
normdir
)
# Do the work
print
(
'Preparing image database'
)
celldb
=
db
.
CellDB
()
celldb
.
load
(
args
.
data
)
celldb
.
equalise_classes
()
print
(
'Preparing and training model'
)
model
=
create_model
(
args
.
model
)
info
=
train_model
(
model
,
celldb
,
args
)
print
(
'Saving results'
)
save_results
(
info
,
args
)
print
(
'Done'
)
if
__name__
==
'__main__'
:
main
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment