Computer Vision
Computer Vision in Python 3
PyTorch
This is an example of a working computer vision model created by Abhishek Thakur with a bit of editing by me. It is split into four categories: dataset, engine, model and train:
Dataset
# dataset.py
import torch
import numpy as np
from PIL import Image
from PIL import ImageFile
# sometimes, you will have images without an ending bit
# this takes care of those kind of (corrupt) images
ImageFile.LOAD_TRUNCATED_IMAGES = True
class ClassificationDataset:
Approaching (Almost) Any Machine Learning Problem
196
"""
A general classification dataset class that you can use for all
kinds of image classification problems. For example,
binary classification, multi-class, multi-label classification
"""
def __init__(
self,
image_paths,
targets,
resize=None,
augmentations=None
):
"""
:param image_paths: list of path to images
:param targets: numpy array
:param resize: tuple, e.g. (256, 256), resizes image if not None
:param augmentations: albumentation augmentations
"""
self.image_paths = image_paths
self.targets = targets
self.resize = resize
self.augmentations = augmentations
def __len__(self):
"""
Return the total number of samples in the dataset
"""
return len(self.image_paths)
def __getitem__(self, item):
"""
For a given "item" index, return everything we need
to train a given model
"""
# use PIL to open the image
image = Image.open(self.image_paths[item])
# convert image to RGB, we have single channel images
image = image.convert("RGB")
# grab correct targets
targets = self.targets[item]
# resize if needed
if self.resize is not None:
image = image.resize(
(self.resize[1], self.resize[0]),
resample=Image.BILINEAR
)
Approaching (Almost) Any Machine Learning Problem
197
# convert image to numpy array
image = np.array(image)
# if we have albumentation augmentations
# add them to the image
if self.augmentations is not None:
augmented = self.augmentations(image=image)
image = augmented["image"]
# pytorch expects CHW instead of HWC
image = np.transpose(image, (2, 0, 1)).astype(np.float32)
# return tensors of image and targets
# take a look at the types!
# for regression tasks,
# dtype of targets will change to torch.float
return {
"image": torch.tensor(image, dtype=torch.float),
"targets": torch.tensor(targets, dtype=torch.long),
}
# engine.py
import torch
import torch.nn as nn
from tqdm import tqdm
def train(data_loader, model, optimizer, device):
"""
This function does training for one epoch
:param data_loader: this is the pytorch dataloader
:param model: pytorch model
:param optimizer: optimizer, for e.g. adam, sgd, etc
:param device: cuda/cpu
"""
# put the model in train mode
model.train()
# go over every batch of data in data loader
for data in data_loader:
Approaching (Almost) Any Machine Learning Problem
198
# remember, we have image and targets
# in our dataset class
inputs = data["image"]
targets = data["targets"]
# move inputs/targets to cuda/cpu device
inputs = inputs.to(device, dtype=torch.float)
targets = targets.to(device, dtype=torch.float)
# zero grad the optimizer
optimizer.zero_grad()
# do the forward step of model
outputs = model(inputs)
# calculate loss
loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
# backward step the loss
loss.backward()
# step optimizer
optimizer.step()
# if you have a scheduler, you either need to
# step it here or you have to step it after
# the epoch. here, we are not using any learning
# rate scheduler
def evaluate(data_loader, model, device):
"""
This function does evaluation for one epoch
:param data_loader: this is the pytorch dataloader
:param model: pytorch model
:param device: cuda/cpu
"""
# put model in evaluation mode
model.eval()
# init lists to store targets and outputs
final_targets = []
final_outputs = []
# we use no_grad context
with torch.no_grad():
for data in data_loader:
inputs = data["image"]
targets = data["targets"]
inputs = inputs.to(device, dtype=torch.float)
targets = targets.to(device, dtype=torch.float)
Approaching (Almost) Any Machine Learning Problem
199
# do the forward step to generate prediction
output = model(inputs)
# convert targets and outputs to lists
targets = targets.detach().cpu().numpy().tolist()
output = output.detach().cpu().numpy().tolist()
# extend the original list
final_targets.extend(targets)
final_outputs.extend(output)
# return final output and final targets
return final_outputs, final_targets
# model.py
import torch.nn as nn
import pretrainedmodels
def get_model(pretrained):
if pretrained:
model = pretrainedmodels.__dict__["alexnet"](
pretrained='imagenet'
)
else:
model = pretrainedmodels.__dict__["alexnet"](
pretrained=None
)
# print the model here to know whats going on.
model.last_linear = nn.Sequential(
nn.BatchNorm1d(4096),
nn.Dropout(p=0.25),
nn.Linear(in_features=4096, out_features=2048),
Approaching (Almost) Any Machine Learning Problem
200
nn.ReLU(),
nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1),
nn.Dropout(p=0.5),
nn.Linear(in_features=2048, out_features=1),
)
return model
Reference
https://www.amazon.com.au/Approaching-Almost-Machine-Learning-Problem-ebook/dp/B089P13QHT