Example of preparing image set for training
Imports¶
from cellsegment.core import *
from cellsegment.dataprep_utils import *
from cellsegment.json_utils import *
from cellsegment.set_directories import *
import pandas
import numpy as np
from fastai.vision import *
Define directories¶
dirs = Dirs('../testdata_2')
print(dirs)
get stats on images¶
show_directory_stats(dirs.originImages)
print(show_directory_stats)
TODO Change this .... Dataset consists of legacy images and micro-i images Structure
Split into 70% Train, 15% Val, 15% Test : taken randomly for the 354 images. - Original (jpg+csv+json) [354 jpg; 354 csv; 354 json; Total = 1062]- Fullsize (random split into 70% Train, 15% Val, 15% Test) - Train - (354 jpg + 354 json) - Label - (354 png) - Crop-200 - Error - nil - Train - (443 jpg) - Labels - (526 png) - Test - (83 jpg) - valid.txt - 73 file names
print("Show original image sizes")
height = 800
fnames = sorted(get_image_files(dirs.originImages))
list_filedata = [None] * len(fnames)
for i,fn in enumerate(fnames):
img = PIL.Image.open(fn)
img_w, img_h = img.size
scale = float(height) / img_h
list_filedata[i] = {'Name':fn.name, 'Width':img_w, 'Height':img_h, 'Scale':scale}
df = pandas.DataFrame(list_filedata)
df = df[['Name', 'Width', 'Height']]
# print(df)
savefn = Path(dirs.sizeCsvFile)
if not savefn.exists():
print(f'Saving {savefn}')
df.to_csv(savefn)
else:
print(f'Filename {savefn} already exists')
print(df.head())
print(df.tail())
Optional - Convert directory of CSV files to JSON files¶
csv_to_json_dir(dirs.originImages, dirs.originImages, number_files='all')
Resize jpg & json files¶
resize_dir(dirs.sizeCsvFile, dirs.originImages, dirs.train, number_files='all', height=800)
resize_json_dir(dirs.sizeCsvFile, dirs.originImages, dirs.train, number_files='all', height=800)
Create label png images¶
Labels centers are from Json files, store png in dest directory")
create_labels_dir(dirs.train, dirs.label, number_files='all')
Split into Train, Val and Test¶
file_csv = dirs.basepath+'/file_data.csv'
fnames = sorted(get_image_files(dirs.train))
fnames = [fn.name for fn in fnames]
df = pandas.DataFrame(fnames)
df.columns = ['Name']
df.to_csv(file_csv, index=True)
shuffle_csv(file_csv,random_state=23)
#
print('\n Split into train valid and test directories')
split_filenames(file_csv, num_train=0.5, num_val=0.25)
#
misslist, croplist= crop_img_dir(file_csv, dirs.train, dirs.train, dirs.crop,
number_files='all', DEBUG=False)
print(f'Num Missed: {len(misslist)}, Num Cropped: {len(croplist)}')
crop_df = pandas.DataFrame(croplist)
crop_df = crop_df[['Name','Label', 'Op']]
crop_df.to_csv(dirs.crop+'/crop_df.csv')
crop_df.tail()
valid_df = crop_df[crop_df.Op=='Valid'].loc[:,'Name']
valid_df.to_csv(dirs.crop+'/valid.txt', index=False, header=True)
valid_df.head(10)
labmisslist, labcroplist = crop_img_dir(file_csv, dirs.train, dirs.label, dirs.crop, number_files='all')
print(f'Num Missed: {len(labmisslist)}, Num Cropped: {len(labcroplist)}')
Save test_data.csv¶
crop_df = pandas.read_csv(dirs.crop+'/crop_df.csv', index_col=0)
crop_df.loc[crop_df.Label == 40,'Label'] = 'Fluke-Rumen'
crop_df.loc[crop_df.Label == 11,'Label'] = 'Fluke-Liver'
crop_df.to_csv(dirs.crop+'/crop_df.csv')
crop_df.tail()
test_df = crop_df[crop_df.Op == 'Test']
test_df.to_csv(dirs.crop+'/test_df.csv', index=False)
test_df.tail()
Save label files without color pallet info¶
otherwise databunch segmentation does not work
fnames = get_image_files(Path(dirs.crop)/'Label')
# fnames = fnames[:3]
for fn in fnames:
img = np.asarray(PIL.Image.open(fn))
PIL.Image.fromarray(img.astype(np.uint8)).save(fn, quality=90)
Test Create DataBunch¶
# create data set
from fastai.vision import *
from fastai.utils.mem import *
path_img = dirs.cropTrain
path_lbl = dirs.cropLabel
codes = np.array(['background', '1', '2', '3']);codes
get_label_fn = lambda x: f'{path_lbl}/{x.stem}.png'
src = (SegmentationItemList.from_folder(path_img)
.split_by_fname_file('../valid.txt')
.label_from_func(get_label_fn, classes=codes, convert_mode='RGB'))
tfms = get_transforms(flip_vert=True, max_rotate=None, max_zoom=1., max_warp=None)
bs = 8
data = (src.transform(tfms, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
Show example images¶
data.show_batch(4, ds_type=DatasetType.Train, figsize=(10,10))
def acc_metric1(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
def acc_metric2(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)[target>0]==target[target>0]).float().mean()
metrics=[acc_metric1, acc_metric2]
wd=1e-2
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd).to_fp16()
run fastai learn¶
if torch.cuda.is_available():
# learn.lr_find()
# learn.recorder.plot()
pass