Commit 5ae044fd authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

turn into script

parent 0e115986
#!/bin/ipython
#coding: utf8
# Train convnet for cell recognition
#!/usr/bin/enc python
# Train model for cell recognition
# Oiwi, 07/2018
# Saad, 09/2018
# import modules
# General
import numpy as np
# ------------------------------ DATA ------------------------------ #
# Create DB with equal numbers of cell/no cells images
def get_balanced_data(data,labels,split=0.0):
classes,counts = np.unique(labels,return_counts=True)
nPerClass = counts.min()
nClasses = len(classes)
idx_train = []
idx_test = []
for cl in classes:
cIdxs = np.where(labels==cl)[0]
cIdxs = np.random.choice(cIdxs, nPerClass, replace=False)
n = int((1.0-split)*len(cIdxs))
idx_train.extend(cIdxs[:n])
idx_test.extend(cIdxs[n:])
data_train = data[idx_train,...].astype(float)
labels_train = labels[idx_train]
data_test = data[idx_test,...].astype(float)
labels_test = labels[idx_test]
return data_train,labels_train,data_test,labels_test
# LOAD DATA
datafile1 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_001.npz'
celldb1 = np.load(datafile1)
datafile2 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_002.npz'
celldb2 = np.load(datafile2)
images = np.concatenate((celldb1['images'],celldb2['images']))
counts = np.concatenate((celldb1['counts'],celldb2['counts']))
# Equal number of examplars per class
# Split train/test
img_train,lab_train,img_val,lab_val = get_balanced_data(images,
counts>0,.1)
# NORMALISE
img_avg = img_train.mean(axis=0)
img_std = img_train.std(axis=0)
img_train = (img_train - img_avg) / img_std
img_val = (img_val - img_avg) / img_std
# One-hot labels
from keras.utils import np_utils
n_classes = len(np.unique(lab_train))
lab_train = np_utils.to_categorical(lab_train, n_classes)
lab_val = np_utils.to_categorical(lab_val, n_classes)
# ------------------------------ MODEL ------------------------------ #
import numpy as np
import time
import os
# DL stuff
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization
from keras import backend as K
from keras import optimizers
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
# Other
import pandas as pd
from CellCounting.utils import db
import argparse
# The below swaps image dimensions / why is this needed?
#if K.backend()=='tensorflow':
# K.set_image_dim_ordering('th')
# matconvnet model
model = Sequential()
model.add(Convolution2D(20, (5, 5), strides=(1, 1), padding='valid', input_shape=img_train.shape[1:]))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(50, (5, 5), strides=(1, 1), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=4, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(500, (5, 5), strides=(2, 2), padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (2, 2), strides=1, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (1, 1), strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Activation('softmax'))
model.summary()
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
# log settings
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint
OutputDir = '/vols/Data/sj/Haber_Digitisation/Images/results'
csv_logger = CSVLogger(OutputDir + '/haber1_loss.log')
checkpointer = ModelCheckpoint(filepath=OutputDir + '/haber1_weights.hdf5', verbose=1, save_best_only=True)
# ------------------------------ DATA ------------------------------ #
def prepare_data(celldb, args):
X_train, y_train, X_test, y_test = celldb.split_train_test(split=args.split)
# Normalise images
#X_train = X_train.astype(np.float32) / 255.0
#X_test = X_test.astype(np.float32) / 255.0
img_avg = X_train.mean(axis=0)
img_std = X_train.std(axis=0)
X_train = (X_train - img_avg) / img_std
X_test = (X_test - img_avg) / img_std
# One-hot labels
# train model
import time
start = time.time()
n_classes = len(np.unique(y_train))
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
# Save mean/std
np.save(os.path.join(args.out,'image_normalise','img_avg.npy'),img_avg)
np.save(os.path.join(args.out,'image_normalise','img_std.npy'),img_std)
return X_train, y_train, X_test, y_test
# train model
import os
gpu=True
if gpu==True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
# ------------------------------ MODEL ------------------------------ #
def create_model(shape,arch='convnet'):
if(arch=='convnet'):
# matconvnet model
model = Sequential()
model.add(Convolution2D(20, (5, 5), strides=(1, 1), padding='valid', input_shape=shape))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(50, (5, 5), strides=(1, 1), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=4, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(500, (5, 5), strides=(2, 2), padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (2, 2), strides=1, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (1, 1), strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Activation('softmax'))
else:
print('Error: Unknown architecture {}'.format(arch))
exit
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
return model
def accuracy(test_x, test_y, model):
result = model.predict(test_x)
predicted_class = np.argmax(test_y, axis=1)
true_class = np.argmax(test_y, axis=1)
num_correct = np.sum(predicted_class == true_class)
accuracy = float(num_correct)/result.shape[0]
return (accuracy * 100)
pred = model.predict(test_x)
# data augmentation
DataAugment = True
if(DataAugment):
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(img_train)
model_info = model.fit_generator(datagen.flow(img_train, lab_train, batch_size=32),
epochs = 200, verbose=1,
shuffle=True, validation_data = (img_val, lab_val),
callbacks=[csv_logger, checkpointer])
else:
model_info = model.fit(img_train,lab_train, batch_size = 32,
epochs = 100, verbose=1,
shuffle=True, validation_data=(img_val,lab_val),
callbacks=[csv_logger, checkpointer])
TP = ((pred[:,1]>pred[:,0])&(test_y[:,1]>test_y[:,0])).sum()
FP = ((pred[:,1]>pred[:,0])&(test_y[:,1]<test_y[:,0])).sum()
TN = ((pred[:,1]<pred[:,0])&(test_y[:,1]<test_y[:,0])).sum()
FN = ((pred[:,1]<pred[:,0])&(test_y[:,1]>test_y[:,0])).sum()
prec = TP/(TP+FP)*100
recall = TP/(TP+FN)*100
end = time.time()
return prec,recall
print("Model took %0.2f seconds to train" %(end - start))
print("Accuracy on test data is: %0.2f" %accuracy(img_val, lab_val, model))
def train_model(model, celldb, args):
# train model
X_train, y_train, X_test, y_test = prepare_data(celldb, args)
start = time.time()
if args.gpu==True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
# data augmentation
DataAugment = args.augment
# Fitting params
epochs = args.epochs
batch_size = args.batch_size
if(DataAugment):
print('* Using data augmentations')
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(X_train)
info = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
epochs = epochs, verbose=1,
shuffle=True, validation_data = (X_test, y_test))
else:
info = model.fit(X_train,y_train, batch_size = batch_size,
epochs = epochs, verbose=1,
shuffle=True, validation_data=(X_test,y_test))
end = time.time()
print('Model took {:.2f} seconds to train'.format(end-start))
prec, recall = accuracy(X_test,y_test,model)
print('Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'.format(prec,recall))
return info
# SAVE MODEL AND FITTING HISTORY
import pandas as pd
def save_results(model, info, args):
outfile = OutputDir + '/model_db1+2_augment.h5'
model.save(outfile)
outhist = OutputDir + '/model_db1+2_augment_hist.csv'
df = pd.DataFrame(model_info.history)
df.to_csv(outhist)
outfile = os.path.join(args.out,'model.h5')
model.save(outfile)
outhist = os.path.join(args.out,'model_hist.csv')
df = pd.DataFrame(info.history)
df.to_csv(outhist)
return
# done
print("Done")
# import matplotlib.pyplot as plt
# df = pd.read_csv(OutputDir + '/tmp/tmp_model_hist.csv')
# plt.plot(df['loss'])
# plt.plot(df['val_loss'])
# plt.show()
# Input
# - Predefined model
# - List of DBs
# - Basename folder for output
# - options for the fitting (use GPU, batches, augmentation, etc.)
# - options for the fitting
# - GPU/Augmentation/ModelType?/
# Output
# - model.h5
# - model history
......@@ -209,26 +172,44 @@ print("Done")
def main():
p = argparse.ArgumentParser(description='Train model on some data')
p.add_argument('-o', '--out', default=None, type=str, metavar='<str>',
help='output basename (default derived from infile)')
p.add_argument('-s', '--stride',default=None, type=int, metavar='<int>',
help='stride (default: width of model input filter)')
p.add_argument('-t', '--timer',default=True, type=bool, metavar='<bool>',
help='time how fast the forward model is')
p.add_argument('-n', '--normdir', default=None, type=str, metavar='<dir>',
help='path to directory that the normalising images live in, img_avg.npy and img_std.npy')
# Optional arguments
p.add_argument('--gpu', default=False, type=bool, metavar='<bool>',
help='use GPU if True (default), use CPU if False')
p.add_argument('--epochs', default=100, type=int, metavar='<int>',
help='number of training epochs (default=100)')
p.add_argument('--batch_size', default=32, type=int, metavar='<int>',
help='batch size (default=32)')
p.add_argument('--model', default='convnet', type=str, metavar='<str>',
help='choose model amongst [convet,...] (default=convnet)')
p.add_argument('--augment', default=False, type=bool, metavar='<bool>',
help='use data augmentation (default=False)')
# Required arguments
required = p.add_argument_group('Required arguments')
required.add_argument('-m', '--inmodel', required=True, type=str, metavar='<str>.h5',
help='model (e.g. convolutional neural network)')
required.add_argument('infile', type=str, metavar='<str>.tif',
help='input image file')
required.add_argument('-d', '--data', required=True, type=str, nargs='*', metavar='<str>npz <str>.npz ...',
help='input databases')
required.add_argument('-o', '--out', required=True, type=str, metavar='<str>',
help='output basename')
# Parse arguments
args = p.parse_args()
ffm(args.infile, basename=args.out, inmodel=args.inmodel, stride=args.stride,
gpu=args.gpu, timer=args.timer, normdir=args.normdir)
# Do the work
print('Preparing image database')
celldb = db.CellDB()
celldb.load(args.data)
celldb.equalise_classes()
print('Preparing and training model')
model = create_model(args.model)
info = train_model(model, celldb, args)
print('Saving results')
save_results(info, args)
print('Done')
if __name__ == '__main__':
main()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment