**On the AM0.1-FLUKE dataset**
import os
#hide
if IN_COLAB:
print('Cloning the cellsegment library')
os.system('mkdir -p /root/.torch/models')
os.system('mkdir -p /root/.fastai/data')
os.system('ln -s /root/.torch/models /content')
os.system('ln -s /root/.fastai/data /content')
os.system('rm -rf /content/sample_data/')
Setup¶
Imports¶
from cellsegment.core import *
from cellsegment.dataprep_utils import *
from cellsegment.inference_utils import *
from cellsegment.set_directories import *
import pandas
from fastai import *
from fastai.vision import *
Define directories¶
dirs = Dirs('data') if IN_COLAB else Dirs('../testdata/')
print(dirs)
#! pip install fastai==1.0.57
! pip freeze | grep 'fastai\|torch*'
# the following liberies are known to work
# fastai==1.0.57
# torch==1.1.0
# torchvision==0.3.0
if IN_COLAB:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
data_dir = "/content/drive/My Drive/Colab Notebooks/Tection/data"
Dataprep - optional, if not alread done¶
Load the Training Images¶
%%bash
[[ ! -e /tools/google-cloud-sdk ]] && exit
export fileid=1SEW0Kf1CI4e4-up4TGsDqwDwVk_QZEUf
export filename=Fluke-Train-2019-12-01.zip
## CURL ##
curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
| sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
curl -L -b cookies.txt -o $filename \
'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
rm -f confirm.txt cookies.txt
unzip -u -q $filename -d data
file_csv = dirs.basepath+'/file_data.csv'
fnames = sorted(get_image_files(dirs.train))
fnames = [fn.name for fn in fnames]
df = pandas.DataFrame(fnames)
df.columns = ['Name']
df.to_csv(file_csv, index=False)
! rm -r /content/data/Crop-200
Shuffle and Split file list into train, valid catagories¶
shuffle_csv(file_csv,random_state=23)
split_filenames(file_csv, num_train=0.7, num_val=0.15)
Crop the image files based on json file centers¶
misslist, croplist= crop_img_dir(dirs.basepath+'/file_data.csv',
dirs.train, dirs.train, dirs.crop,
number_files='all', DEBUG=False)
print(f'Num Missed: {len(misslist)}, Num Cropped: {len(croplist)}')
Make crop file list and valid file list, save as csv and txt files¶
crop_df = pd.DataFrame(croplist)
crop_df = crop_df[['Name','Label', 'Op']]
# relabel with text
crop_df.loc[crop_df.Label == '40','Label'] = 'Fluke-Rumen'
crop_df.loc[crop_df.Label == '11','Label'] = 'Fluke-Liver'
crop_df.to_csv(dirs.crop+'/crop_df.csv', index=False)
crop_df.tail(10)
valid_df = crop_df[crop_df.Op=='Valid'].loc[:,'Name']
valid_df.to_csv(dirs.crop+'/valid.txt', index=False, header=True)
valid_df.head(10)
Crop the label files based on json file centers¶
labmisslist, labcroplist = crop_img_dir(file_csv, dirs.train, dirs.label, dirs.crop, number_files='all')
print(f'Num Missed: {len(labmisslist)}, Num Cropped: {len(labcroplist)}')
test_df = crop_df[crop_df.Op == 'Test']
test_df.to_csv(dirs.crop+'/test_df.csv', index=False)
test_df.tail(10)
Remove pallet info from label images as it seems to mess up the dataloader¶
fnames = get_image_files(Path(dirs.crop)/'Label')
# fnames = fnames[:3]
for fn in fnames:
img = np.asarray(PIL.Image.open(fn))
PIL.Image.fromarray(img.astype(np.uint8)).save(fn, quality=90)
print(f'Label files: {len(fnames)} resaved')
Training Section¶
Create DataBunch¶
path_img = dirs.cropTrain
path_lbl = dirs.cropLabel
codes = np.array(['background', '1', '2', '3']);codes
get_label_fn = lambda x: f'{path_lbl}/{x.stem}.png'
src = (SegmentationItemList.from_folder(path_img)
.split_by_fname_file('../valid.txt')
.label_from_func(get_label_fn, classes=codes, convert_mode='RGB'))
tfms = get_transforms(flip_vert=True, max_rotate=10, max_zoom=1.1, max_warp=0.2)
if IN_COLAB:
bs = 60
else:
bs = 20
data = (src.transform(tfms, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
print("bs = ", bs)
print(data)
Show example images¶
data.show_batch(4, ds_type=DatasetType.Train, figsize=(10,10))
Learner¶
# import pdb
# pdb.set_trace()
def acc_metric1(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
def acc_metric2(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()
metrics=[acc_metric1, acc_metric2]
wd=1e-2
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd).to_fp16()
find best learning rate¶
if torch.cuda.is_available():
learn.lr_find()
learn.recorder.plot()
Train Body for 20 epochs with adam¶
lr = 3e-3
learn.fit_one_cycle(20, slice(lr))
learn.show_results(rows=3, imgsize=5)
learn.save('stage1')
os.rename(learn.data.path/'models/stage1.pth', 'stage1.pth')
! cp 'stage1.pth' 'drive/My Drive/Colab Notebooks/Techion/data/models'
learn.load('stage1')
Unfreeze and train body for another 10 cycles¶
learn.unfreeze()
os.rename(learn.data.path/'models/stage2.pth', 'stage2.pth')
! cp 'stage2.pth' 'drive/My Drive/Colab Notebooks/Techion/data/Fluke'
learn.fit_one_cycle(10, slice(lr/10))
defaults.device = torch.device('cpu')
learn.export('export.pkl')
os.rename(learn.data.path/'export.pkl', 'export-fluke-2019-12-01.pkl')
! cp 'export-fluke-2019-12-01.pkl' 'drive/My Drive/Colab Notebooks/Techion/data/Fluke'
learn.save('stage2')
os.rename(learn.data.path/'models/stage2.pth', 'stage2.pth')
! cp 'stage2.pth' 'drive/My Drive/Colab Notebooks/Techion/data/Fluke'
Work in Progress - Inference Section¶
Load exported Model¶
%%bash
switch=true
if $switch; then
export fileid=11cZWhg23QDag_3b7jcd02U8Pzq3W6U5Y
export filename=export-fluke-2019-11-23.pkl
## CURL ##
curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
| sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
curl -L -b cookies.txt -o $filename \
'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
rm -f confirm.txt cookies.txt
fi
# defaults.device = 'cpu'
defaults.device = 'cuda'
def acc_metric1(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
def acc_metric2(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()
learn = load_learner('', 'export-fluke.pkl')
learn.model.float()
learn.show_results(rows=5, figsize=(10,20), ds_type=DatasetType.Valid)
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]])
def draw_outline(o, lw):
o.set_path_effects([patheffects.Stroke(
linewidth=lw, foreground='black'), patheffects.Normal()])
def draw_rect(ax, b):
patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor='white', lw=2))
draw_outline(patch, 4)
def draw_text(ax, xy, txt, sz=14, color='white'):
text = ax.text(*xy, txt,
verticalalignment='top', color=color, fontsize=sz, weight='bold')
draw_outline(text, 1)
def show_img(im, figsize=None, ax=None, alpha=None, label=None, title=None):
if not ax: fig, ax = plt.subplots(figsize=figsize)
ax.imshow(im, alpha=alpha)
ax.set_axis_off()
if label: ax.legend()
if title: ax.set_title(title)
return ax
Run model inference on all of the 78 Test tiles¶
path = Path('data/Crop-200')
path_img = path/'Train'
path_lbl = path/'Label'
path_tst = path/'Test'
fnames = get_image_files(path_tst)
print (f'Number of test tiles {len(fnames)}')
# Plot test images with auto markup labels
def run_inference(fnames, offset=0):
fig, axes = plt.subplots(4, 5, figsize=(16, 12))
for i, ax in enumerate(axes.flat):
if i+offset >= len(fnames): break
img = open_image(fnames[i+offset])
pc,pi,o = learn.predict(img)
show_image(img, ax=ax)
show_image(pc, ax=ax, cmap='tab20', alpha=0.5)
draw_text(ax, (0, 0), fnames[i+offset].stem, color='red')
plt.tight_layout()
fnames = sorted(get_image_files(path/'Test'))
run_inference(fnames, offset=0)
run_inference(fnames, offset=20)
run_inference(fnames, offset=40)
run_inference(fnames, offset=60)
fnames = sorted(get_image_files(path/'Test'))
# preds= iu.run_inferences(learn, fnames, start=60)
preds= run_inferences(learn, fnames, number_files='all')
len(preds)
print(f'Populate dataframe in path {path_tst}')
df = pd.read_csv(path/"test_df.csv")
lst = add_cols_to_probs_df(df)
df.sort_values(by=['Name'])
df.to_csv(path/"test_df.csv", index=False)
df.tail(10)
print(f'Adding inference probs to the dataframe')
probs_to_df(preds, df)
# df.to_csv(dirs.crop+'/results_df.csv', index=False)
df.sort_values(by=['Name'], inplace=True)
print(f'Find Maximums')
df = pd.read_csv(path/"results_df.csv")
df["max_prob"] = df[["Background", "Fluke_Liver", "Fluke_Rumen"]].max(axis=1)
df.tail(10)
df.loc[df.Fluke_Liver == df.max_prob, "max_prob"] = 'Fluke-Liver'
df.loc[df.Fluke_Rumen == df.max_prob, "max_prob"] = 'Fluke-Rumen'
df.tail(10)
df.to_csv(dirs.crop+'/results_df.csv', index=False)
print(f'Populate dataframe in path {path_tst}')
df = pd.read_csv(path/"test_df.csv")
lst = add_cols_to_probs_df(df)
df.sort_values(by=['Name'])
df.to_csv(path/"test_df.csv", index=False)
df.tail(10)
print(f'Adding inference probs to the dataframe')
probs_to_df(preds, df)
df.to_csv(dirs.crop+'/results_df.csv', index=False)
df.sort_values(by=['Name'], inplace=True)
print(f'Find Maximums')
df = pd.read_csv(path/"results_df.csv")
df["max_prob"] = df[["Background", "Fluke_Liver", "Fluke_Rumen"]].max(axis=1)
df.tail(10)
df.loc[df.Fluke_Liver == df.max_prob, "max_prob"] = 'Fluke-Liver'
df.loc[df.Fluke_Rumen == df.max_prob, "max_prob"] = 'Fluke-Rumen'
df.tail(10)
df.to_csv(dirs.crop+'/results_df.csv', index=False)
# Plot images and labels
fnames = sorted(get_image_files(path/'Label'))
fig, axes = plt.subplots(4, 4, figsize=(12, 12))
for i, ax in enumerate(axes.flat):
img = open_image(fnames[i])
# pc,pi,o = learn.predict(img)
ax = show_image(img, ax=ax)
draw_text(ax, (0, 0), fnames[i].stem)
plt.tight_layout()
# for fn in fnames:
fnames = sorted(get_image_files(path/'Test'))
fn = fnames[65]
img = open_image(fn)
pc,pi,o = learn.predict(img)
# PIL.Image.fromarray(img.astype(np.uint8)).save(fn, quality=90)
# print(f'Label files: {len(fnames)} resaved')
pc
o.shape
o
# 236569 - 20-0
fnames = sorted(get_image_files(path/'Test'))
for i , fn in enumerate(fnames):
print(i, fn)
fnames[65]
ax = show_image(img)
ax = show_image(pc,ax=ax, cmap='tab20', alpha=0.4)
# im = image2np(pc.cpu())
pc.show()
o
Load Half Res Exported model¶
%%bash
switch=true
if $switch; then
export fileid=11cZWhg23QDag_3b7jcd02U8Pzq3W6U5Y
export filename=export-fluke.pkl
## CURL ##
curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
| sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
curl -L -b cookies.txt -o $filename \
'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
rm -f confirm.txt cookies.txt
fi
# defaults.device = 'cpu'
defaults.device = 'cuda'
def acc_metric1(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
def acc_metric2(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()
learn = load_learner('', 'export-fluke.pkl')
learn.model.float()
Infer Classes¶
from fastai import *
from fastai.vision import *
# import pdb; pdb.set_trace()
def padImage_t(img, pad=100):
if pad and pad > 0:
return F.pad(input=img.px, pad=(pad, pad, pad, pad), mode='constant', value=0)
else:
return img
def cut_tiles_t(img, TM=4, TN=4, pad=100):
M, N = (img.shape[1]-pad*2)//TM, (img.shape[2]-pad*2)//TN
OM, ON = pad + M//2, pad + N//2
return [Image(img[:,x-OM:x+OM,y-ON:y+ON]) for x in range(pad+M//2,img.shape[1],M) for y in range(pad+N//2,img.shape[2],N)]
def lay_tiles_t(tiles, TM=4, TN=4, pad=100):
(_,M,N) = tiles[0].size()
OM, ON = pad + M//2, pad + N//2
for n, tile in enumerate(tiles):
tiles[n] = tile[:,pad:-(pad+1),pad:-(pad+1)]
hstack = [torch.cat(tiles[y:y+TN],dim=2) for y in range(0,TN*TM,TN)]
return torch.cat(hstack,dim=1)
# learn.model.to_fp32()
learn.data.remove_tfm(batch_to_half)
learn.model.float()
# img = open_image('data/images/220972 - 1.jpg').resize(1232)
img = open_image('data/images-half/220968 - 2.jpg')
# img = open_image('data/images/220972 - 1.jpg')
img = padImage_t(img)
tiles = cut_tiles_t(img, TM=2, TN=2, pad=100)
n = len(tiles)
pred_class, pred_idx, outputs = [None]*n, [None]*n, [None]*n
for i, im in enumerate(tiles):
pc,pi,o = learn.predict(im)
pred_class[i] = pc.px
vstack = lay_tiles_t(pred_class, TM=2, TN=2, pad=100)
Image(vstack).show(figsize = (10,10))
Infer Probabilities¶
Generate prediction png file¶
! rm -r data/images-half-markup
from fastai import *
from fastai.vision import *
def unet_predict_eggs(fn):
img = open_image(fn)
PAD = 100
TM, TN =2, 2
img = padImage_t(img, pad=PAD)
tiles = cut_tiles_t(img, TM=TM, TN=TN, pad=PAD)
outputs = []
for i, im in enumerate(tiles):
pc,pi,o = learn.predict(im)
outputs.append(o)
vstack = lay_tiles_t(outputs, TM=TM, TN=TN, pad=PAD)
vstack[0,:,:] = vstack[1,:,:]
vstack[1,:,:] = vstack[2,:,:]
vstack[2,:,:] = 0
# vstack[vstack<0.3] = 0
img = to_np(vstack).transpose(1,2,0)
return img
def predict_in_image(fn, CONF=0.3):
pred_img = unet_predict_eggs(fn)
# predictions = mark_predictions(pred_img, CONF=CONF)
# # print(predictions)
# jdata= annotate_json(f'{img_path}/{fn.stem}.json', predictions)
# mrk_img = np.asarray(PIL.Image.open(fn))
# mrk_img = draw_labels_cv(mrk_img, jdata, radius=50)
# return pred_img, mrk_img, jdata
return pred_img
# img_path = Path('data/images/')
# mrk_path = Path('data/markup/')
img_path = Path('data/images-half/')
mrk_path = Path('data/images-half-markup/')
mrk_path.mkdir(parents=True, exist_ok=True)
fnames = sorted(get_image_files(img_path))
# fnames = fnames[600:]
fnames = fnames[-100:]
print(f"Number images to process {len(fnames)}")
for fn in fnames:
print(fn)
# pred_img, mrk_img, jdata = predict_in_image(fn)
pred_img= predict_in_image(fn)
# with open(f'{mrk_path}/{fn.stem}.json', 'w') as outfile:
# json.dump(jdata, outfile, ensure_ascii=False, indent=4)
PIL.Image.fromarray((pred_img*255).astype(np.uint8)).save(f'{mrk_path}/{fn.stem}.png')
# PIL.Image.fromarray(mrk_img.astype(np.uint8)).save(f'{mrk_path}/{fn.stem}.jpg', quality=90)
# if len(fnames) < 2:
# show_img(mrk_img, figsize = (10,10))
%%bash
cd data
# zip -r 'test_images_markup.zip' testimages-markup
zip -r 'markup.zip' markup
# ! mv data/'test_images_markup.zip' '/content/gdrive/My Drive/Colab Notebooks/Techion/data/SecondDataSet'
!ls data
! mv data/markup.zip '/content/gdrive/My Drive/Colab Notebooks/Techion/data/SecondDataSet'
Class Analyse Predictions¶
## Class Analyse Predictions
# import pdb; pdb.set_trace()
import cv2
from skimage.measure import label, regionprops
from skimage import filters
from skimage.morphology import erosion, dilation, opening, closing, disk
from scipy.spatial import distance
import pandas as pd
from pandas import DataFrame
#
# class AnalysePredictions(object):
# """Methods for preparing and the inference of Well images"""
# def __init__(self):
# """What to do here"""
# self.set_paths();
#
# def set_paths(self, base_path=None, img_path=None, mrk_path=None):
# if base_path is None:
# base_path = Path().absolute()
# if img_path is not None:
# self.img_path = base_path/Path(img_path)
# self.mrk_path = base_path/Path(mrk_path)
# else:
# self.img_path = Path('data/images/')
# self.mrk_path = Path('data/markup/')
#
# # NP coder for Tile into MxN sections to reduce memory footprint
# def padImage_np(self, img, padding=100):
# if padding and padding > 0:
# return np.stack([np.pad(img[:,:,c], padding, mode='constant', constant_values=0) for c in range(3)], axis=2)
# else:
# return img
#
# def cut_tiles_np(img, TM=4, TN=4, pad=100):
# M, N = (img.shape[0]-pad*2)//TM, (img.shape[1]-pad*2)//TN
# OM, ON = pad + M//2, pad + N//2
# return [img[x-OM:x+OM,y-ON:y+ON,:] for x in range(pad+M//2,img.shape[0],M) for y in range(pad+N//2,img.shape[1],N)]
#
# def lay_tiles_np(self, tiles, TM=4, TN=4, pad=100):
# OM, ON = pad + M//2, pad + N//2
# for n, tile in enumerate(tiles):
# tiles[n] = tile[pad:-pad,pad:-pad,:]
#
# hstack = [np.concatenate(tiles[y:y+TN],axis=1) for y in range(0,TN*TM,TN)]
# return np.concatenate(hstack,axis=0)
#
# def test_tile_np(self):
# img = to_np(open_image('data/subset/220972 - 1.jpg').resize(800).px).transpose(1,2,0)
# img = padImage_np(img)
# tiles = cut_tiles_np(img)
# vstack = lay_tiles_np(tiles, TM=4, TN=4, pad=100)
# show_img(vstack, figsize = (10,10))
#
# ## Tensor coder for Tile into MxN sections to reduce memory footprint
#
# def padImage_t(self, img, pad=100):
# if pad and pad > 0:
# return F.pad(input=img.px, pad=(pad, pad, pad, pad), mode='constant', value=0)
# else:
# return img
#
# def cut_tiles_t(self, img, TM=4, TN=4, pad=100):
# M, N = (img.shape[1]-pad*2)//TM, (img.shape[2]-pad*2)//TN
# OM, ON = pad + M//2, pad + N//2
# return [Image(img[:,x-OM:x+OM,y-ON:y+ON]) for x in range(pad+M//2,img.shape[1],M) for y in range(pad+N//2,img.shape[2],N)]
#
# def lay_tiles_t(self, tiles, TM=4, TN=4, pad=100):
# (_,M,N) = tiles[0].size()
# OM, ON = pad + M//2, pad + N//2
# for n, tile in enumerate(tiles):
# tiles[n] = tile[:,pad:-(pad+1),pad:-(pad+1)]
#
# hstack = [torch.cat(tiles[y:y+TN],dim=2) for y in range(0,TN*TM,TN)]
# return torch.cat(hstack,dim=1)
#
# def test_tile_t(self):
# img = open_image('data/subset/220972 - 1.jpg')
# # img = to_np(open_image('data/subset/220972 - 1.jpg').resize(800).px).transpose(1,2,0)
# img = padImage_t(img)
# tiles = cut_tiles_t(img)
#
# for n, tile in enumerate(tiles):
# tiles[n] = tile.px
# print(tile.px.shape)
#
# vstack = lay_tiles_t(tiles)
# Image(vstack).show(figsize = (10,10))
#
# ## Drawing annotation labels on an image
# def draw_labels_cv(self, img, json, radius=40):
# font = cv2.FONT_HERSHEY_SIMPLEX
# for s, sh in enumerate(json['shapes']):
# if sh["label"][:3] == "Str":
# fill = (255,0,0)
# elif sh["label"][:3] == "Nem":
# fill = (0,255,0)
# else:
# print('[Error]: unknown label')
#
# draw = 'None'
# if sh['shape_type'] == 'circle':
# draw = 'circle'
# probability = str(sh['probability']) if 'probability' in sh else ''
# elif sh['shape_type'] == 'rectangle':
# draw = 'rectangle'
# else:
# print("Unknown shape_type", sh['shape_type'])
#
#
# xy = np.asarray(sh["points"])
# ave = np.mean(xy,axis=0)
#
# cx = int(ave[0])
# cy = int(ave[1])
#
# if draw == 'circle':
# cv2.circle(img, (cx, cy), radius, fill, 2)
# cv2.circle(img, (cx, cy), radius, fill, 2)
# cv2.putText(img,probability,(int(cx-radius), cy-radius), font, 1, fill, 2, cv2.LINE_AA)
#
#
# elif draw == 'rectangle':
# cv2.rectangle(img, (cx - radius, cy - radius), (cx + radius, cy + radius), fill, 2)
#
# return img
# def test_draw_labels_cv(self):
# img_path = Path('data/subset/')
# tst_path = Path('data/test/')
# tst_path.mkdir(parents=True, exist_ok=True)
# fn = Path('data/markup/220966 - 1.png')
# img = np.asarray(PIL.Image.open(fn))
# _json = json.load(open('data/markup/220966 - 1.json'))
# mrk_img = draw_labels_cv(img, _json, radius=40)
# show_img(mrk_img[:500,1000:1500,:], figsize = (10,10))
# PIL.Image.fromarray(mrk_img.astype(np.uint8)).save(f'{tst_path}/{fn.stem}.jpg', quality=90)
#
# ##
# def find_prediction_blobs(self, img, CONF=0.5, radius=40, plot=False):
# # region props seems to have region.max_intensity errors if no data not np.int
# SCALE = 100
# CONF *= SCALE
# selem = disk(6)
# # img = filters.gaussian(img, sigma= 1 / 40, multichannel=True)
# img = (img * (SCALE/img.max())).astype(np.int)
# img[img[:,:,0]<CONF,0] = 0
# img[img[:,:,1]<CONF,1] = 0
# # img[:,:,0] = img[:,:,0] > CONF
# # img[:,:,1] = img[:,:,1] > CONF
# predictions = []
# # imgL = img[:,:,0].astype(np.int)
# # img[:,:,0] = closing(img[:,:,0], selem)
# # img[:,:,1] = closing(img[:,:,1], selem)
# # img[:,:,0] = opening(img[:,:,0], selem)
# # img[:,:,1] = opening(img[:,:,1], selem)
#
# fill = (255,0,0)
# label_image0 = label(img[:,:,0] > CONF)
# label_image1 = label(img[:,:,1] > CONF)
#
# # img = img.copy() # helped with a cv error?
# img = np.array(img) # helped with a cv error?
# for region in regionprops(label_image0, img[:,:,0], cache=True):
# if region.area > 100:
# cx = int(region.centroid[1])
# cy = int(region.centroid[0])
# # import pdb; pdb.set_trace()
# # cv2.rectangle(img, (cx - radius, cy - radius), (cx + radius, cy + radius), fill, 5)
# predictions.append({"label": 'Strongyle', "point": [cx,cy], "probability": region.max_intensity.round(2)})
# # print(' 0:', [cx,cy], 'area:', region.area,
# # 'max', region.max_intensity.round(2),
# # 'mean', region.mean_intensity.round(2))
# fill = (0,255,0)
# img = np.array(img) # helped with a cv error?
# for region in regionprops(label_image1, img[:,:,1]):
# if region.area > 100:
# cx = int(region.centroid[1])
# cy = int(region.centroid[0])
# # cv2.rectangle(img, (cx - radius, cy - radius), (cx + radius, cy + radius), fill, 5)
# predictions.append({"label": 'Nematodirus',"point": [cx,cy], "probability": region.max_intensity.round(2)})
# # print(f' 1: area {region.area}, max intensity {region.max_intensity.round(5)}')
# # # print(region.area)
# if plot:
# # show_img(imglab, figsize = (15,15))
# plt.figure(figsize=(15, 15))
# plt.subplot(121)
# plt.imshow(label_image0, cmap='nipy_spectral')
# # plt.imshow(img[:,:,0] > CONF, cmap='nipy_spectral')
# plt.axis('off')
# plt.subplot(122)
# plt.imshow(label_image1, cmap='nipy_spectral')
# plt.axis('off')
#
# plt.tight_layout()
# plt.show()
#
# return predictions, img
# def test_find_prediction_blobs(self ):
# # fn = 'data/markup/220966 - 1.png'
# tst_path = Path('data/test/')
# fn = Path('data/markup/221221 - 1.png')
# print(f"Testing: def test_find_prediction_blobs('{fn}'):")
# img = np.asarray(PIL.Image.open(fn))
#
#
# anno_list, proc_img = find_prediction_blobs(img, plot=True)
# print("Max value", proc_img.max())
# PIL.Image.fromarray(proc_img.astype(np.uint8)).save(f'{tst_path}/{fn.stem}.png')
# # print(anno_list)
#
# ## Annotate the json file with predictions
# def annotate_json(self, fn, annotations=None):
# def add_anno(data, item):
# r = 40
# cx,cy = item['point']
# pnt_list = [[cx-r,cy-r], [cx+r,cy+r]]
#
# probability = str(item['probability']) if 'probability' in item else str(0)
#
# if item['label'][:3]=='Str':
# line_color = [255,0,0,127]
# data['shapes'].append({
# "label": item['label'],"line_color": line_color, "fill_color": None,
# "points": pnt_list, "shape_type": "circle", 'probability': probability
# })
#
# elif item['label'][:3]=='Nem':
# line_color = [0,255,0,127]
# data['shapes'].append({
# "label": item['label'],"line_color": line_color, "fill_color": None,
# "points": pnt_list, "shape_type": "circle", 'probability': probability
# })
# else:
# print('Unknown label')
#
# def add_annotations(data, annotations):
# for item in annotations:
# add_anno(data, item)
#
# def del_circle_annotations(data):
# to_del = [s for s,sh in enumerate(data['shapes']) if sh['shape_type']=='circle']
# if len(to_del) > 0:
# print(f'Deleting {len(to_del)} circle annotations')
# for i in sorted(to_del, reverse=True):
# del data['shapes'][i]
#
# data = json.load(open(fn))
# del_circle_annotations(data)
# add_annotations(data, annotations)
# return data
#
# def test_annotate_json(self):
# fn = 'data/markup/220966 - 1.png'
# img = np.asarray(PIL.Image.open(fn))
#
# predictions, _ = self.find_prediction_blobs(img, plot=False)
# print(predictions)
# data= self.annotate_json('data/subset/220966 - 1.json', predictions)
# print(data)
# with open('data/subset/220966 - 1.json', 'w') as outfile:
# json.dump(data, outfile, ensure_ascii=False, indent=4)
#
# ## Unet Predict classes from a well image
# def unet_predict_classes(self):
# fn = 'data/subset/220967 - 1.json'
# data = json.load(open(fn))
#
# ## Infer Classes
#
# img = open_image('data/subset/220972 - 1.jpg')
#
# img = padImage_t(img)
# tiles = cut_tiles_t(img)
#
# n = len(tiles)
# pred_class, pred_idx, outputs = [None]*n, [None]*n, [None]*n
# for i, im in enumerate(tiles):
# pc,pi,o = learn.predict(im)
# pred_class[i] = pc.px
#
# vstack = lay_tiles_t(pred_class, TM=4, TN=4, pad=100)
# Image(vstack).show(figsize = (10,10))
#
# ## Infer Probabilities
# def unet_predict_eggs(self, fn):
# img = open_image(fn)
#
# PAD = 100
# TM, TN =4, 4
# img = padImage_t(img, pad=PAD)
# tiles = cut_tiles_t(img, TM=TM, TN=TN, pad=PAD)
#
# outputs = []
# for i, im in enumerate(tiles):
# pc,pi,o = learn.predict(im)
# outputs.append(o)
#
# vstack = lay_tiles_t(outputs, TM=TM, TN=TN, pad=PAD)
# vstack[0,:,:] = vstack[1,:,:]
# vstack[1,:,:] = vstack[2,:,:]
# vstack[2,:,:] = 0
# # vstack[vstack<0.3] = 0
# img = to_np(vstack).transpose(1,2,0)
# return img
#
#
# ### Generate prediction png file
# def predict_in_image(self, fn, CONF=0.3):
# pred_img = unet_predict_eggs(fn)
# predictions = mark_predictions(pred_img, CONF=CONF)
# # print(predictions)
# jdata= annotate_json(f'{img_path}/{fn.stem}.json', predictions)
#
# mrk_img = np.asarray(PIL.Image.open(fn))
# mrk_img = draw_labels_cv(mrk_img, jdata, radius=50)
# return pred_img, mrk_img, jdata
#
# def markup_image(self, img, CONF=0.5):
# predictions, _ = self.find_prediction_blobs(img, CONF=CONF)
# # print(predictions)
# jdata= self.annotate_json(f'{self.img_path}/{fn.stem}.json', predictions)
#
# mrk_img = np.asarray(PIL.Image.open(f'{img_path}/{fn.stem}.jpg'))
# mrk_img = self.draw_labels_cv(mrk_img, jdata, radius=50)
# return mrk_img, jdata
#
# def markup_all_images_dir(self, count='all', CONF=0.5):
# fnames = sorted(get_files(self.mrk_path, '.png'))
# if count.isdigit():
# fnames = fnames[:count]
# print(f'Marking up {len(fnames)} images')
#
# for n, fn in enumerate(fnames):
# # print(fn, end=' ')
# src_img = np.asarray(PIL.Image.open(fn))
# # find blobs in png mrk_img, jdata = self.markup_image(img)
# predictions, _ = self.find_prediction_blobs(src_img, CONF=CONF)
# print(len(predictions), end=', ')
# if n % 20 == 0:
# print(';')
#
# # annotate json
# jdata= self.annotate_json(f'{self.img_path}/{fn.stem}.json', predictions)
# with open(f'{self.mrk_path}/{fn.stem}.json', 'w') as outfile:
# json.dump(jdata, outfile, ensure_ascii=False, indent=4)
# # mark up jpg
# mrk_img = np.asarray(PIL.Image.open(f'{self.img_path}/{fn.stem}.jpg'))
# mrk_img = self.draw_labels_cv(mrk_img, jdata, radius=50)
# PIL.Image.fromarray(mrk_img.astype(np.uint8)).save(f'{self.mrk_path}/{fn.stem}.jpg', quality=90)
#
# return mrk_img
#
#
#
# def calc_stats_row(self, jdata, radius=30):
# human = []
# machine = []
# for s,sh in enumerate(jdata['shapes']):
# ave = np.mean(np.asarray(sh["points"]),axis=0).tolist()
# if sh['shape_type'] == 'rectangle':
# human.append(ave)
# elif sh['shape_type'] == 'circle':
# machine.append(ave)
# else:
# print("unknown label", data["imagePath"])
#
# if (len(human) > 0) and (len(machine) > 0):
# dist = distance.cdist(human, machine, 'euclidean')
# n_human, n_AI, n_match = len(human), len(machine), (np.min(dist, axis=1) < radius).sum()
# else:
# n_human, n_AI, n_match = len(human), len(machine), 0
#
# row = {
# 'File': jdata["imagePath"],
# 'Num Human': n_human,
# 'Num AI': n_AI,
# 'Matched': n_match,
# 'AI: Un-matched': n_AI - n_match,
# 'AI: Missed Eggs': n_human - n_match
# }
#
# return row
#
# def calc_stats_table(self):
# # img_path = Path('data/markup')
# # mrk_path = Path('data/testimages-markup/')
#
# fnames = sorted(get_files(self.mrk_path, '.json'))
#
# df = DataFrame (columns = ['File', 'Num Human','Num AI', 'Matched', 'AI: Un-matched', 'AI: Missed Eggs'])
# fnames = fnames
# for fn in fnames:
# data = json.load(open(fn))
# row = self.calc_stats_row(data, radius=30)
# df = df.append(row, ignore_index=True)
#
# for col in range(1, len(df.columns)):
# df.iloc[:,col] = pd.to_numeric(df.iloc[:,col])
#
# _sum = df.sum(axis = 0, skipna=True, numeric_only=True).rename('Total')
# _mean = df.mean(axis = 0, skipna=True, numeric_only=True).rename('Mean')
#
# # df= df.append(df.sum(axis = 0, skipna=True, numeric_only=True).rename('Total'))
#
# # df=df.append(df.mean(axis = 0, skipna=True, numeric_only=True).rename('Mean'))
# df = df.append(_mean).append(_sum)
# return df
# def plot_piechart(self, df):
# fig, axes = plt.subplots(1, 2, figsize=(12, 6))
# colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
# explode = (0.1, 0) # explode 1st slice
# labels = 'Matched Eggs', 'AI: Missed Eggs'
# sizes = [df.at['Total','Matched'], df.at['Total','AI: Missed Eggs']]
# axes[0].pie(sizes, explode=explode, labels=labels, colors=colors,
# autopct='%1.1f%%', shadow=True, startangle=140)
#
# labels = 'Matched Eggs', 'AI: Predictions not Matched'
# sizes = [df.at['Total','Matched'], df.at['Total','AI: Un-matched']]
# axes[1].pie(sizes, explode=explode, labels=labels, colors=colors,
# autopct='%1.1f%%', shadow=True, startangle=140)
# plt.tight_layout(pad=0.0)
# # pi = AnalysePredictions()
# # print("Start tests:")
# pi.test_annotate_json()
# ii.test_draw_labels_cv()
# ii.test_find_prediction_blobs()
Run Analysis¶
# def show_img(im, figsize=None, ax=None, alpha=None):
# if not ax: fig,ax = plt.subplots(figsize=figsize)
# ax.imshow(im, alpha=alpha)
# ax.set_axis_off()
# return ax
#
#
# ip = AnalysePredictions()
# ip.set_paths(base_path='',
# img_path='data/images-half/',
# mrk_path = 'data/images-half-markup/')
# mrk_img = ip.markup_all_images_dir(count = 'all')
# show_img(mrk_img, figsize = (10,10))
# df = ip.calc_stats_table()
# df.tail(10)
# ip.plot_piechart(df)