**Model trained On the AM0.1-FLUKE dataset**

Setup environment

Imports

from cellsegment.core import *
from cellsegment.inference_utils import *
from cellsegment.set_directories import *
import pandas
from fastai import *
from fastai.vision import *

Define directories

local_datapath = '../testdata_2/'
# local_datapath = '../../data/FEC/03-M100-Fluke-2019-11/'

dirs = Dirs('data') if IN_COLAB else Dirs(local_datapath)
dirs.model = dirs.model if IN_COLAB else '/home/john/github/data/FEC/03-M100-Fluke-2019-11/models'

print(dirs)
None
  basepath        :  ../testdata_2/       
  crop            :  ../testdata_2//Crop-200 
  cropLabel       :  ../testdata_2//Crop-200/Label 
  cropTest        :  ../testdata_2//Crop-200/Test 
  cropTrain       :  ../testdata_2//Crop-200/Train 
  cropValidTxtFile:  ../testdata_2//Crop-200/valid.txt 
  label           :  ../testdata_2//Fullsize/Label 
  model           :  /home/john/github/data/FEC/03-M100-Fluke-2019-11/models 
  originImages    :  ../testdata_2//Original 
  sizeCsvFile     :  ../testdata_2//file_size.csv 
  test            :  ../testdata_2//Fullsize/Test 
  train           :  ../testdata_2//Fullsize/Train 
  validTxtFile    :  ../testdata_2//Fullsize/valid.txt 

Load the Training Images

(if in colab)

%%bash
[[ ! -e /tools/google-cloud-sdk ]] &&  exit # if in colab

    export fileid=1SEW0Kf1CI4e4-up4TGsDqwDwVk_QZEUf
    export filename=Fluke-Train-2019-12-01.zip

    ## CURL ##
    curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
         | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
    curl -L -b cookies.txt -o $filename \
         'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
    rm -f confirm.txt cookies.txt
    
    unzip -u -q $filename -d data

Load exported Model

%%bash
[[ ! -e /tools/google-cloud-sdk ]] &&  exit # if in colab
switch=true
if $switch; then  
    export fileid=11cZWhg23QDag_3b7jcd02U8Pzq3W6U5Y
    export filename=export-fluke-2019-12-01.pkl

    ## CURL ##
    curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id='$fileid \
         | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
    curl -L -b cookies.txt -o $filename \
         'https://docs.google.com/uc?export=download&id='$fileid'&confirm='$(<confirm.txt)
    rm -f confirm.txt cookies.txt
fi

Run model inference on all of the test tiles

path = Path(dirs.crop)
fnames =  get_image_files(path/'Test')

print (f'Number of test tiles {len(fnames)}')
Number of test tiles 13

Create Learner from exported model

# defaults.device = 'cpu'
defaults.device = 'cuda'
if torch.cuda.is_available():

    def acc_metric1(input, target):
        target = target.squeeze(1)
        return (input.argmax(dim=1)==target).float().mean()

    def acc_metric2(input, target):
        target = target.squeeze(1)
        return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()

    fn_model = f'{dirs.model}/export-fluke-2019-12-01.pkl'
    learn = load_learner('', fn_model)

    learn.model.float()
    # summary(learn.model, (3, 32, 32))
    print("Learner loaded")
else:
    print("Learner not loaded as torch.cuda.is_available() = ", torch.cuda.is_available())
    
Learner loaded

Inferences

Running an inference produces 3 tensors with the same xy dimensions Each xy position contains the predicted class, label and probabilities for pixel at that location. We concentrate here on the raw predictions as this gis use more information as the first two are binary thresholded results. Plotting each of the layers of raw_preds shows the probability of the pixel at that location being in one of the n classes. Here we have 4 classes, Background, Fluke-Liver, Fluke-Rumen & other

def run_and_plot_raw_pred(fn):
    fig, axes = plt.subplots(1, 5, figsize=(15, 3))
    titles = ['Background','Fluke-Liver','Fluke-Rumen','other']
    img = open_image(fn)
    pc,pi,raw_pred = learn.predict(open_image(fn))
    ax = axes.flat[0]
    im = ax.imshow(img.data.permute(1,2,0).numpy())
    ax.set_axis_off()
    ax.set_title(fn.name)
    for i, ax in enumerate(axes.flat[1:]):
        arr = (raw_pred[i,:,:]).numpy()
        im = ax.imshow(arr, cmap='inferno', vmin=0, vmax=1)
        ax.set_axis_off()
        ax.set_title(titles[i])
df = pd.read_csv(path/"test_df.csv")
df = df[['Name', 'Label', 'Op']]
df
Name Label Op
0 236568 - 7-0.jpg Fluke-Liver Test
1 236568 - 8-0.jpg Fluke-Liver Test
2 236569 - 7-0.jpg Fluke-Rumen Test
3 236569 - 21-0.jpg Fluke-Rumen Test
4 236569 - 21-1.jpg Fluke-Rumen Test
5 236568 - 4-0.jpg Fluke-Liver Test
6 236568 - 4-1.jpg Fluke-Liver Test
7 236568 - 3-0.jpg Fluke-Liver Test
8 236568 - 3-1.jpg Fluke-Liver Test
9 236568 - 15-0.jpg Fluke-Liver Test
10 236568 - 15-1.jpg Fluke-Liver Test
11 236568 - 15-2.jpg Fluke-Liver Test
12 236569 - 13-0.jpg Fluke-Rumen Test

Raw_preds for a Liver Fluke

run_and_plot_raw_pred(path/'Test'/df.iloc[0].Name)
print('Label is ', df.iloc[0].Label)
Label is  Fluke-Liver

Raw_preds for a Rumen Fluke

run_and_plot_raw_pred(path/'Test'/df.iloc[2].Name)
print('Label is ', df.iloc[2].Label)
Label is  Fluke-Rumen

Probability Calculation

The background layer is used the generate a binary mask, this . For each of the n class layers ( here n = 4 ) the average of all the pixels within the mask region is calculated and returned. These should sum to 1

Run inferences on all the test tiles

path_img = path/'Train'
path_lbl = path/'Label'
path_tst = path/'Test'

fnames =  sorted(get_image_files(path/'Test'))

print (f'Number of test tiles to run {len(fnames)}')
preds= run_inferences(learn, fnames, number_files='all')
print (f'Number of predictions generated {len(preds)}')
Number of test tiles to run 13
Number of predictions generated 13

Dataframe of inference probabilities

print(f"Populate dataframe in path {path/'Test'}")
df = pd.read_csv(path/"test_df.csv")
df = df[['Name', 'Label', 'Op']]

col_heads = ['Background', 'Fluke_Liver', 'Fluke_Rumen', 'Other']

print(f'Adding inference probs to the dataframe')
preds_to_df(preds, col_heads, df)   
df.to_csv(dirs.crop+'/results_df.csv', index=False)
df.sort_values(by=['Name'], inplace=True)

print(f'Finding maximums')
df["max_prob"] = df[["Background", "Fluke_Liver", "Fluke_Rumen"]].max(axis=1).round(2)

df.loc[df.Fluke_Liver == df.max_prob, "max_label"] = 'Fluke-Liver'
df.loc[df.Fluke_Rumen == df.max_prob, "max_label"] = 'Fluke-Rumen'
df.to_csv(dirs.crop+'/results_df.csv', index=False)

#print(df[['Name', 'Label', 'Pstr', 'max_prob']].tail(10))
summary_df = df[['Name', 'Label', 'max_label', 'max_prob']]
summary_df
Populate dataframe in path ../testdata_2/Crop-200/Test
Adding inference probs to the dataframe
Finding maximums
Name Label max_label max_prob
9 236568 - 15-0.jpg Fluke-Liver NaN 0.79
10 236568 - 15-1.jpg Fluke-Liver NaN 0.85
11 236568 - 15-2.jpg Fluke-Liver NaN 0.85
7 236568 - 3-0.jpg Fluke-Liver NaN 0.84
8 236568 - 3-1.jpg Fluke-Liver NaN 0.85
5 236568 - 4-0.jpg Fluke-Liver NaN 0.84
6 236568 - 4-1.jpg Fluke-Liver NaN 0.85
0 236568 - 7-0.jpg Fluke-Liver NaN 0.83
1 236568 - 8-0.jpg Fluke-Liver NaN 0.83
12 236569 - 13-0.jpg Fluke-Rumen NaN 0.79
3 236569 - 21-0.jpg Fluke-Rumen NaN 0.79
4 236569 - 21-1.jpg Fluke-Rumen NaN 0.80
2 236569 - 7-0.jpg Fluke-Rumen NaN 0.80

Explanation of results

Plot of all the test tiles

plot_inferences(preds, df, src_path=path_tst, label='max_label', start=0, cols=4, rows=4)
# add the actual label   todo
print("make image columns number even")
fnames =  sorted(get_image_files(dirs.train))
img = open_image(fnames[1])
if img.shape[2] % 2 == 1:img.px = img.px[:,:,1:]
pc,pi,raw_pred = learn.predict(img)
make even dimensions
torch.Size([3, 800, 817])
find_prediction_blobs(img, CONF=0.5, min_area=500,offset=0)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-29-9fc8ac4b8d58> in <module>
----> 1 find_prediction_blobs(img, CONF=0.5, min_area=500,offset=0)

~/github/cellsegment/cellsegment/inference_utils.py in find_prediction_blobs(img, CONF, min_area, offset)
    154     CONF *= SCALE
    155 
--> 156     img = (img * (SCALE / img.max())).astype(np.int)
    157     img[img[:, :, 0] < CONF, 0] = 0
    158     img[img[:, :, 1] < CONF, 1] = 0

AttributeError: 'Image' object has no attribute 'max'
import torch.nn.functional as F

source = torch.ones((3, 5, 5))
# now we expand to size (7, 11) by appending a row of 0s at pos 0 and pos 6, 
# and a column of 0s at pos 10
result = F.pad(input=source, pad=(1, 1,1,1), mode='constant', value=(0)
print(source.shape)
print(result.shape)
  File "<ipython-input-24-5712344f1e96>", line 7
    print(source.shape)
        ^
SyntaxError: invalid syntax
result
result.shape

old - todo

from skimage.measure import label, regionprops

def _get_props(tens, layer, min_conf):
    min_area = 100
    arr = o[layer,:,:].numpy()
    if layer == 0:
        label_arr = label(arr < 1-min_conf)
    else:
        label_arr = label(arr > min_conf)
        
    region_props = []
    for region in regionprops(label_arr, arr, cache=True):
        if region.area > min_area:
            cx = int(region.centroid[1])
            cy = int(region.centroid[0])
            region_props.append({ 
                "class_layer": layer,
                "centroid": region.centroid, 
                "mean_intensity": region.mean_intensity,
                "area": region.area,
                "coords": region.coords,
            })     
    return region_props
def _get_bool_mask(o):
    min_conf = 0.5
    arr = o[0,:,:].numpy()
    mask = np.zeros((arr.shape), dtype=bool)
    bgnd = _get_props(o, 0, min_conf)
    AAcoords = bgnd[0]["coords"]
    mask[AAcoords.T.tolist()] = True
    return mask
def _calc_probs(o):
    mask = _get_bool_mask(o)
    p = [None]*o.shape[0]
    for i in range(o.shape[0]):
        p[i] = o[i,:,:].numpy()[mask].mean()
        
    return p

old_ Calculate probability of marked up region

# # get boolean mask of a region
# mask = get_bool_mask(o)
# show_img(mask)
p = calc_probs(o)
print(f'{p[0]:3.2f} {p[1]:3.2f} {p[2]:3.2f}')

p
# print(f'{p:3.2f}')p
# print(f'{p0.mean():3.2f},{p1.mean():3.2f},{p2.mean():3.2f},{p3.mean():3.2f}')
# print(f'{(p0+p1+p2).mean():3.2f}')
min_conf = 0.2
bgnd = __get_props(o, 0, min_conf)
lyr1 = __get_props(o, 1, min_conf)
lyr1
lyr2 = __get_props(o, 2, min_conf)
lyr2
print(bgnd[0]["area"] )
print(lyr1[0]["area"], lyr1[0]["mean_intensity"], lyr1[0]["mean_intensity"] * lyr1[0]["area"] / bgnd[0]["area"])
print(lyr2[0]["area"], lyr2[0]["mean_intensity"], lyr2[0]["mean_intensity"] * lyr2[0]["area"] / bgnd[0]["area"])
# region.mean_intensity * region.area / 314
bgnd[0]["centroid"] 

print(o[0,90,90], o[1,90,90], o[2,90,90], o[3,90,90])
AAcoords = bgnd[0]["coords"]
AAcoords
arr = o[0,:,:].numpy()
bin = label(arr < 0.5) > 0
bin.max()
AA0 = o[0,:,:].numpy()[bin]
AA0.mean()
AA1 = o[1,:,:].numpy()[bin]
AA1.mean()
AA2 = o[2,:,:].numpy()[bin]
AA2.mean()
(AA0+AA1+AA2).mean()
def find_prediction_blobs(img, min_conf=0.1, min_area=100, plot=False):
    RESIZE = 2.0
    img = cv2.resize(img,None,fx=1.0/RESIZE, fy=1.0/RESIZE, interpolation = cv2.INTER_AREA)
    min_area = int(min_area/RESIZE)
    # region props seems to have region.max_intensity errors if no data not np.int 
    SCALE = 100
    min_conf *= SCALE
    img = (img * (SCALE/img.max())).astype(np.int)
    img[img[:,:,0]<min_conf,0] = 0
    img[img[:,:,1]<min_conf,1] = 0

    predictions = []


    label_image0 = label(img[:,:,0] > min_conf)
    label_image1 = label(img[:,:,1] > min_conf)

    for region in regionprops(label_image0, img[:,:,0], cache=True):
        if region.area > min_area:
            cx = int(region.centroid[1]*RESIZE)
            cy = int(region.centroid[0]*RESIZE)
            predictions.append({  
                "label": 'Strongyle', 
                "point": [cx,cy], 
                "probability": region.max_intensity.round(2),
                "area": region.area
            })  

    fill = (0,255,0)
    for region in regionprops(label_image1, img[:,:,1]):
        if region.area > min_area:
            cx = int(region.centroid[1]*RESIZE)
            cy = int(region.centroid[0]*RESIZE)
            predictions.append({
                "label": 'Nematodirus',
                "point": [cx,cy], 
                "probability": region.max_intensity.round(2),
                "area": region.area
            })
    if plot:
        #   show_img(imglab, figsize = (15,15))
        plt.figure(figsize=(15, 15))
        plt.subplot(121)
        plt.imshow(label_image0, cmap='nipy_spectral')
        #   plt.imshow(img[:,:,0] > min_conf, cmap='nipy_spectral')
        plt.axis('off')
        plt.subplot(122)
        plt.imshow(label_image1, cmap='nipy_spectral')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

    return predictions, img