Setup environment¶

Imports¶

from cellsegment.core import *
from cellsegment.inference_utils import *
from cellsegment.set_directories import *
import pandas
from fastai import *
from fastai.vision import *

Define directories¶

local_datapath = '../testdata_2/'
# local_datapath = '../../data/FEC/03-M100-Fluke-2019-11/'

dirs = Dirs('data') if IN_COLAB else Dirs(local_datapath)
dirs.model = dirs.model if IN_COLAB else '/home/john/github/data/FEC/03-M100-Fluke-2019-11/models'

print(dirs)

None
  basepath        :  ../testdata_2/       
  crop            :  ../testdata_2//Crop-200 
  cropLabel       :  ../testdata_2//Crop-200/Label 
  cropTest        :  ../testdata_2//Crop-200/Test 
  cropTrain       :  ../testdata_2//Crop-200/Train 
  cropValidTxtFile:  ../testdata_2//Crop-200/valid.txt 
  label           :  ../testdata_2//Fullsize/Label 
  model           :  /home/john/github/data/FEC/03-M100-Fluke-2019-11/models 
  originImages    :  ../testdata_2//Original 
  sizeCsvFile     :  ../testdata_2//file_size.csv 
  test            :  ../testdata_2//Fullsize/Test 
  train           :  ../testdata_2//Fullsize/Train 
  validTxtFile    :  ../testdata_2//Fullsize/valid.txt

Load the Training Images¶

(if in colab)

%%bash
[[ ! -e /tools/google-cloud-sdk ]] &&  exit # if in colab

    export fileid=1SEW0Kf1CI4e4-up4TGsDqwDwVk_QZEUf
    export filename=Fluke-Train-2019-12-01.zip

    ## CURL ##
    curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
         | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
    curl -L -b cookies.txt -o $filename \
         'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
    rm -f confirm.txt cookies.txt
    
    unzip -u -q $filename -d data

Load exported Model¶

%%bash
[[ ! -e /tools/google-cloud-sdk ]] &&  exit # if in colab
switch=true
if $switch; then  
    export fileid=11cZWhg23QDag_3b7jcd02U8Pzq3W6U5Y
    export filename=export-fluke-2019-12-01.pkl

    ## CURL ##
    curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
         | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
    curl -L -b cookies.txt -o $filename \
         'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
    rm -f confirm.txt cookies.txt
fi

Run model inference on all of the test tiles¶

path = Path(dirs.crop)
fnames =  get_image_files(path/'Test')

print (f'Number of test tiles {len(fnames)}')

Number of test tiles 13

Create Learner from exported model¶

# defaults.device = 'cpu'
defaults.device = 'cuda'
if torch.cuda.is_available():

    def acc_metric1(input, target):
        target = target.squeeze(1)
        return (input.argmax(dim=1)==target).float().mean()

    def acc_metric2(input, target):
        target = target.squeeze(1)
        return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()

    fn_model = f'{dirs.model}/export-fluke-2019-12-01.pkl'
    learn = load_learner('', fn_model)

    learn.model.float()
    # summary(learn.model, (3, 32, 32))
    print("Learner loaded")
else:
    print("Learner not loaded as torch.cuda.is_available() = ", torch.cuda.is_available())

Learner loaded

Inferences¶

Running an inference produces 3 tensors with the same xy dimensions Each xy position contains the predicted class, label and probabilities for pixel at that location. We concentrate here on the raw predictions as this gis use more information as the first two are binary thresholded results. Plotting each of the layers of raw_preds shows the probability of the pixel at that location being in one of the n classes. Here we have 4 classes, Background, Fluke-Liver, Fluke-Rumen & other

def run_and_plot_raw_pred(fn):
    fig, axes = plt.subplots(1, 5, figsize=(15, 3))
    titles = ['Background','Fluke-Liver','Fluke-Rumen','other']
    img = open_image(fn)
    pc,pi,raw_pred = learn.predict(open_image(fn))
    ax = axes.flat[0]
    im = ax.imshow(img.data.permute(1,2,0).numpy())
    ax.set_axis_off()
    ax.set_title(fn.name)
    for i, ax in enumerate(axes.flat[1:]):
        arr = (raw_pred[i,:,:]).numpy()
        im = ax.imshow(arr, cmap='inferno', vmin=0, vmax=1)
        ax.set_axis_off()
        ax.set_title(titles[i])

df = pd.read_csv(path/"test_df.csv")
df = df[['Name', 'Label', 'Op']]
df

Raw_preds for a Liver Fluke¶

run_and_plot_raw_pred(path/'Test'/df.iloc[0].Name)
print('Label is ', df.iloc[0].Label)

Label is  Fluke-Liver

Raw_preds for a Rumen Fluke¶

run_and_plot_raw_pred(path/'Test'/df.iloc[2].Name)
print('Label is ', df.iloc[2].Label)

Label is  Fluke-Rumen

Probability Calculation¶

The background layer is used the generate a binary mask, this . For each of the n class layers ( here n = 4 ) the average of all the pixels within the mask region is calculated and returned. These should sum to 1

Run inferences on all the test tiles¶

path_img = path/'Train'
path_lbl = path/'Label'
path_tst = path/'Test'

fnames =  sorted(get_image_files(path/'Test'))

print (f'Number of test tiles to run {len(fnames)}')
preds= run_inferences(learn, fnames, number_files='all')
print (f'Number of predictions generated {len(preds)}')

Number of test tiles to run 13
Number of predictions generated 13

Dataframe of inference probabilities¶

print(f"Populate dataframe in path {path/'Test'}")
df = pd.read_csv(path/"test_df.csv")
df = df[['Name', 'Label', 'Op']]

col_heads = ['Background', 'Fluke_Liver', 'Fluke_Rumen', 'Other']

print(f'Adding inference probs to the dataframe')
preds_to_df(preds, col_heads, df)   
df.to_csv(dirs.crop+'/results_df.csv', index=False)
df.sort_values(by=['Name'], inplace=True)

print(f'Finding maximums')
df["max_prob"] = df[["Background", "Fluke_Liver", "Fluke_Rumen"]].max(axis=1).round(2)

df.loc[df.Fluke_Liver == df.max_prob, "max_label"] = 'Fluke-Liver'
df.loc[df.Fluke_Rumen == df.max_prob, "max_label"] = 'Fluke-Rumen'
df.to_csv(dirs.crop+'/results_df.csv', index=False)

#print(df[['Name', 'Label', 'Pstr', 'max_prob']].tail(10))
summary_df = df[['Name', 'Label', 'max_label', 'max_prob']]
summary_df

Populate dataframe in path ../testdata_2/Crop-200/Test
Adding inference probs to the dataframe
Finding maximums

Explanation of results¶

Plot of all the test tiles¶

plot_inferences(preds, df, src_path=path_tst, label='max_label', start=0, cols=4, rows=4)
# add the actual label   todo

print("make image columns number even")
fnames =  sorted(get_image_files(dirs.train))
img = open_image(fnames[1])
if img.shape[2] % 2 == 1:img.px = img.px[:,:,1:]
pc,pi,raw_pred = learn.predict(img)

make even dimensions
torch.Size([3, 800, 817])

find_prediction_blobs(img, CONF=0.5, min_area=500,offset=0)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-29-9fc8ac4b8d58> in <module>
----> 1 find_prediction_blobs(img, CONF=0.5, min_area=500,offset=0)

~/github/cellsegment/cellsegment/inference_utils.py in find_prediction_blobs(img, CONF, min_area, offset)
    154     CONF *= SCALE
    155 
--> 156     img = (img * (SCALE / img.max())).astype(np.int)
    157     img[img[:, :, 0] < CONF, 0] = 0
    158     img[img[:, :, 1] < CONF, 1] = 0

AttributeError: 'Image' object has no attribute 'max'

import torch.nn.functional as F

source = torch.ones((3, 5, 5))
# now we expand to size (7, 11) by appending a row of 0s at pos 0 and pos 6, 
# and a column of 0s at pos 10
result = F.pad(input=source, pad=(1, 1,1,1), mode='constant', value=(0)
print(source.shape)
print(result.shape)

  File "<ipython-input-24-5712344f1e96>", line 7
    print(source.shape)
        ^
SyntaxError: invalid syntax

result

result.shape

old - todo¶

from skimage.measure import label, regionprops

def _get_props(tens, layer, min_conf):
    min_area = 100
    arr = o[layer,:,:].numpy()
    if layer == 0:
        label_arr = label(arr < 1-min_conf)
    else:
        label_arr = label(arr > min_conf)
        
    region_props = []
    for region in regionprops(label_arr, arr, cache=True):
        if region.area > min_area:
            cx = int(region.centroid[1])
            cy = int(region.centroid[0])
            region_props.append({ 
                "class_layer": layer,
                "centroid": region.centroid, 
                "mean_intensity": region.mean_intensity,
                "area": region.area,
                "coords": region.coords,
            })     
    return region_props

def _get_bool_mask(o):
    min_conf = 0.5
    arr = o[0,:,:].numpy()
    mask = np.zeros((arr.shape), dtype=bool)
    bgnd = _get_props(o, 0, min_conf)
    AAcoords = bgnd[0]["coords"]
    mask[AAcoords.T.tolist()] = True
    return mask

def _calc_probs(o):
    mask = _get_bool_mask(o)
    p = [None]*o.shape[0]
    for i in range(o.shape[0]):
        p[i] = o[i,:,:].numpy()[mask].mean()
        
    return p

old_ Calculate probability of marked up region¶

# # get boolean mask of a region
# mask = get_bool_mask(o)
# show_img(mask)

p = calc_probs(o)
print(f'{p[0]:3.2f} {p[1]:3.2f} {p[2]:3.2f}')

p
# print(f'{p:3.2f}')p
# print(f'{p0.mean():3.2f},{p1.mean():3.2f},{p2.mean():3.2f},{p3.mean():3.2f}')
# print(f'{(p0+p1+p2).mean():3.2f}')

min_conf = 0.2
bgnd = __get_props(o, 0, min_conf)
lyr1 = __get_props(o, 1, min_conf)
lyr1
lyr2 = __get_props(o, 2, min_conf)
lyr2
print(bgnd[0]["area"] )
print(lyr1[0]["area"], lyr1[0]["mean_intensity"], lyr1[0]["mean_intensity"] * lyr1[0]["area"] / bgnd[0]["area"])
print(lyr2[0]["area"], lyr2[0]["mean_intensity"], lyr2[0]["mean_intensity"] * lyr2[0]["area"] / bgnd[0]["area"])
# region.mean_intensity * region.area / 314
bgnd[0]["centroid"] 

print(o[0,90,90], o[1,90,90], o[2,90,90], o[3,90,90])

AAcoords = bgnd[0]["coords"]
AAcoords

arr = o[0,:,:].numpy()
bin = label(arr < 0.5) > 0
bin.max()

AA0 = o[0,:,:].numpy()[bin]
AA0.mean()

AA1 = o[1,:,:].numpy()[bin]
AA1.mean()

AA2 = o[2,:,:].numpy()[bin]
AA2.mean()

(AA0+AA1+AA2).mean()

def find_prediction_blobs(img, min_conf=0.1, min_area=100, plot=False):
    RESIZE = 2.0
    img = cv2.resize(img,None,fx=1.0/RESIZE, fy=1.0/RESIZE, interpolation = cv2.INTER_AREA)
    min_area = int(min_area/RESIZE)
    # region props seems to have region.max_intensity errors if no data not np.int 
    SCALE = 100
    min_conf *= SCALE
    img = (img * (SCALE/img.max())).astype(np.int)
    img[img[:,:,0]<min_conf,0] = 0
    img[img[:,:,1]<min_conf,1] = 0

    predictions = []


    label_image0 = label(img[:,:,0] > min_conf)
    label_image1 = label(img[:,:,1] > min_conf)

    for region in regionprops(label_image0, img[:,:,0], cache=True):
        if region.area > min_area:
            cx = int(region.centroid[1]*RESIZE)
            cy = int(region.centroid[0]*RESIZE)
            predictions.append({  
                "label": 'Strongyle', 
                "point": [cx,cy], 
                "probability": region.max_intensity.round(2),
                "area": region.area
            })  

    fill = (0,255,0)
    for region in regionprops(label_image1, img[:,:,1]):
        if region.area > min_area:
            cx = int(region.centroid[1]*RESIZE)
            cy = int(region.centroid[0]*RESIZE)
            predictions.append({
                "label": 'Nematodirus',
                "point": [cx,cy], 
                "probability": region.max_intensity.round(2),
                "area": region.area
            })
    if plot:
        #   show_img(imglab, figsize = (15,15))
        plt.figure(figsize=(15, 15))
        plt.subplot(121)
        plt.imshow(label_image0, cmap='nipy_spectral')
        #   plt.imshow(img[:,:,0] > min_conf, cmap='nipy_spectral')
        plt.axis('off')
        plt.subplot(122)
        plt.imshow(label_image1, cmap='nipy_spectral')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

    return predictions, img

	Name	Label	max_label	max_prob
9	236568 - 15-0.jpg	Fluke-Liver	NaN	0.79
10	236568 - 15-1.jpg	Fluke-Liver	NaN	0.85
11	236568 - 15-2.jpg	Fluke-Liver	NaN	0.85
7	236568 - 3-0.jpg	Fluke-Liver	NaN	0.84
8	236568 - 3-1.jpg	Fluke-Liver	NaN	0.85
5	236568 - 4-0.jpg	Fluke-Liver	NaN	0.84
6	236568 - 4-1.jpg	Fluke-Liver	NaN	0.85
0	236568 - 7-0.jpg	Fluke-Liver	NaN	0.83
1	236568 - 8-0.jpg	Fluke-Liver	NaN	0.83
12	236569 - 13-0.jpg	Fluke-Rumen	NaN	0.79
3	236569 - 21-0.jpg	Fluke-Rumen	NaN	0.79
4	236569 - 21-1.jpg	Fluke-Rumen	NaN	0.80
2	236569 - 7-0.jpg	Fluke-Rumen	NaN	0.80

	Name	Label	Op
0	236568 - 7-0.jpg	Fluke-Liver	Test
1	236568 - 8-0.jpg	Fluke-Liver	Test
2	236569 - 7-0.jpg	Fluke-Rumen	Test
3	236569 - 21-0.jpg	Fluke-Rumen	Test
4	236569 - 21-1.jpg	Fluke-Rumen	Test
5	236568 - 4-0.jpg	Fluke-Liver	Test
6	236568 - 4-1.jpg	Fluke-Liver	Test
7	236568 - 3-0.jpg	Fluke-Liver	Test
8	236568 - 3-1.jpg	Fluke-Liver	Test
9	236568 - 15-0.jpg	Fluke-Liver	Test
10	236568 - 15-1.jpg	Fluke-Liver	Test
11	236568 - 15-2.jpg	Fluke-Liver	Test
12	236569 - 13-0.jpg	Fluke-Rumen	Test

Example - Inference evaluation

Setup environment¶

Imports¶

Define directories¶

Load the Training Images¶

Load exported Model¶

Run model inference on all of the test tiles¶

Create Learner from exported model¶

Inferences¶

Raw_preds for a Liver Fluke¶

Raw_preds for a Rumen Fluke¶

Probability Calculation¶

Run inferences on all the test tiles¶

Dataframe of inference probabilities¶

Explanation of results¶

Plot of all the test tiles¶

old - todo¶

old_ Calculate probability of marked up region¶