Commit b24fa1c2 authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

Updates

parents eab2606e 09a5b52d
#!/usr/bin/env python3
import argparse
import numpy as np
import pandas as pd
import re
from CellCounting.Utils.db import DataBase
DB_IMAGE_RES = 64
def check_imshape(shape):
sx, sy, _ = shape
if sx != sy:
return False
if (sx % DB_IMAGE_RES != 0) or (sy % DB_IMAGE_RES !=0):
return False
return True
def append_file_content(fname,image_list,count_list):
df = pd.read_table(fname)
udf = df.groupby('Sub-Image-File').count()
for f in udf.index:
# Load Numpy array
im = np.load(f.strip())
if not check_imshape(im.shape):
print("Error: Bad Image dimensions. Must be square and multiple of {}".format(DB_IMAGE_RES))
sizx, sizy, _ = im.shape
# Split into sub-zones
size_ratio = sizx//DB_IMAGE_RES
im2 = im.reshape(size_ratio,sizx//size_ratio,size_ratio,sizy//size_ratio,3)
im3 = im2.transpose(0,2,1,3,4).reshape(size_ratio**2,sizx//size_ratio,sizy//size_ratio,3)
image_list.append(im3)
res = re.findall("w_(\d+).(\d+)_h_(\d+).(\d+)", f)[0]
W = round(float(res[0]+"."+res[1]))
H = round(float(res[2]+"."+res[3]))
count_cells = np.zeros((size_ratio, size_ratio),dtype=int)
for indiv_cells in df.values[df['Sub-Image-File']==f]:
if np.isnan(indiv_cells[1]):
pass
else:
w = float(indiv_cells[1])-W
h = float(indiv_cells[2])-H
count_cells[int(w//(sizx/size_ratio)),int(h//(sizy/size_ratio))] += 1
count_list.append(count_cells.flatten())
return len(udf.index)
def create_db(file_list,outfile):
image_list = []
count_list = []
total = 0
for f in file_list:
total += append_file_content(f,image_list,count_list)
shape = image_list[0].shape[1:]
count_list = np.array(count_list).flatten()
np.savez(outfile,counts=count_list,
images=np.array(image_list).reshape(-1,*shape))
print("Created DB with {} Images from a list of {} with {} containing cells.".format(len(count_list),total,(count_list>0).sum()))
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(
"Create DB from clicked textfiles"
)
parser.add_argument("outfile",
help="Output file name")
parser.add_argument("file",
help="Clicky text file",
nargs='+')
args = parser.parse_args()
create_db(args.file,args.outfile)
if __name__ == '__main__':
main()
saad@jalapeno00.fmrib.ox.ac.uk.24091:1532960442
\ No newline at end of file
import glymur
import PIL
from PIL import Image
import matplotlib.pyplot as plt
fname = "/Users/saad/Desktop/FromJulia/mn96FS_c10_s4.jp2"
fname = "/Users/saad/grot/mn38c23LY_Neu13_10d_L.tif"
jp2 = glymur.Jp2k(fname)
step = 2**3
thumbnail = jp2[::step,::step]
plt.show(thumbnail)
# Convert previous Matlab database to new format
import scipy.io as sio
db = sio.loadmat("/Users/saad/data/Haber_CellCounting/celldb.mat",struct_as_record=False, squeeze_me=True)
images = db['celldb'].images.data
labels = -db['celldb'].images.label + 2
import numpy as np
images = data.transpose(3,0,1,2)
outfile="/Users/saad/data/Haber_CellCounting/celldb.npz"
shape = images.shape[1:]
np.savez(outfile,counts=np.array(labels).flatten(),
images=np.array(images).reshape(-1,*shape))
#!/usr/bin/enc python
#!/usr/bin/env python
# Train model for cell recognition
# Oiwi, 07/2018
......@@ -8,8 +8,8 @@
# General
import numpy as np
import time
import os
import time, os, sys
import argparse
# DL stuff
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
......@@ -21,7 +21,7 @@ from keras.preprocessing.image import ImageDataGenerator
# Other
import pandas as pd
from CellCounting.utils import db
import argparse
# ------------------------------ DATA ------------------------------ #
def prepare_data(celldb, args):
......
#!/bin/ipython
#coding: utf8
# Train convnet for cell recognition
# Oiwi, 07/2018
# Saad, 09/2018
# import modules
import numpy as np
# ------------------------------ DATA ------------------------------ #
# Create DB with equal numbers of cell/no cells images
def get_balanced_data(data,labels,split=0.0):
classes,counts = np.unique(labels,return_counts=True)
nPerClass = counts.min()
nClasses = len(classes)
idx_train = []
idx_test = []
for cl in classes:
cIdxs = np.where(labels==cl)[0]
cIdxs = np.random.choice(cIdxs, nPerClass, replace=False)
n = int((1.0-split)*len(cIdxs))
idx_train.extend(cIdxs[:n])
idx_test.extend(cIdxs[n:])
data_train = data[idx_train,...].astype(float)
labels_train = labels[idx_train]
data_test = data[idx_test,...].astype(float)
labels_test = labels[idx_test]
return data_train,labels_train,data_test,labels_test
# LOAD DATA
datafile1 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_001.npz'
celldb1 = np.load(datafile1)
datafile2 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_002.npz'
celldb2 = np.load(datafile2)
images = np.concatenate((celldb1['images'],celldb2['images']))
counts = np.concatenate((celldb1['counts'],celldb2['counts']))
# Equal number of examplars per class
# Split train/test
img_train,lab_train,img_val,lab_val = get_balanced_data(images,
counts>0,.1)
# NORMALISE
img_avg = img_train.mean(axis=0)
img_std = img_train.std(axis=0)
img_train = (img_train - img_avg) / img_std
img_val = (img_val - img_avg) / img_std
# One-hot labels
from keras.utils import np_utils
n_classes = len(np.unique(lab_train))
lab_train = np_utils.to_categorical(lab_train, n_classes)
lab_val = np_utils.to_categorical(lab_val, n_classes)
# ------------------------------ MODEL ------------------------------ #
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization
from keras import backend as K
from keras import optimizers
# The below swaps image dimensions / why is this needed?
#if K.backend()=='tensorflow':
# K.set_image_dim_ordering('th')
# matconvnet model
model = Sequential()
model.add(Convolution2D(20, (5, 5), strides=(1, 1), padding='valid', input_shape=img_train.shape[1:]))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(50, (5, 5), strides=(1, 1), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=4, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(500, (5, 5), strides=(2, 2), padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (2, 2), strides=1, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (1, 1), strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Activation('softmax'))
model.summary()
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
# log settings
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint
OutputDir = '/vols/Data/sj/Haber_Digitisation/Images/results'
csv_logger = CSVLogger(OutputDir + '/haber1_loss.log')
checkpointer = ModelCheckpoint(filepath=OutputDir + '/haber1_weights.hdf5', verbose=1, save_best_only=True)
# train model
import time
start = time.time()
# train model
import os
gpu=True
if gpu==True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
def accuracy(test_x, test_y, model):
result = model.predict(test_x)
predicted_class = np.argmax(test_y, axis=1)
true_class = np.argmax(test_y, axis=1)
num_correct = np.sum(predicted_class == true_class)
accuracy = float(num_correct)/result.shape[0]
return (accuracy * 100)
# data augmentation
DataAugment = True
if(DataAugment):
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(img_train)
model_info = model.fit_generator(datagen.flow(img_train, lab_train, batch_size=32),
epochs = 200, verbose=1,
shuffle=True, validation_data = (img_val, lab_val),
callbacks=[csv_logger, checkpointer])
else:
model_info = model.fit(img_train,lab_train, batch_size = 32,
epochs = 100, verbose=1,
shuffle=True, validation_data=(img_val,lab_val),
callbacks=[csv_logger, checkpointer])
end = time.time()
print("Model took %0.2f seconds to train" %(end - start))
print("Accuracy on test data is: %0.2f" %accuracy(img_val, lab_val, model))
# SAVE MODEL AND FITTING HISTORY
import pandas as pd
outfile = OutputDir + '/model_db1+2_augment.h5'
model.save(outfile)
outhist = OutputDir + '/model_db1+2_augment_hist.csv'
df = pd.DataFrame(model_info.history)
df.to_csv(outhist)
# done
print("Done")
# import matplotlib.pyplot as plt
# df = pd.read_csv(OutputDir + '/tmp/tmp_model_hist.csv')
# plt.plot(df['loss'])
# plt.plot(df['val_loss'])
# plt.show()
......@@ -123,3 +123,23 @@ class CellDB(object):
print(' ')
print('-----------------------------------------------')
return
# Check prediction against true classes
def check_prediction_yn(self, labels, verbose=False):
TP = ((self.cell_counts>0) & (labels>0)).sum()
FP = ((self.cell_counts==0) & (labels>0)).sum()
TN = ((self.cell_counts==0) & (labels==0)).sum()
FN = ((self.cell_counts>0) & (labels==0)).sum()
recall = TP/(TP+FN)
prec = TN/(TN+FP)
acc = (TP+TN)/(TP+FP+TN+FN)
if verbose:
print('Accuracy = {:.2f}'.format(acc*100))
print('Precision = {:.2f}'.format(prec*100))
print('Recall = {:.2f}'.format(recall*100))
return prec, recall, acc
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment