Computer Vision

Computer Vision in Python 3

PyTorch

This is an example of a working computer vision model created by Abhishek Thakur with a bit of editing by me. It is split into four categories: dataset, engine, model and train:

Dataset

# dataset.py
import torch
import numpy as np
from PIL import Image
from PIL import ImageFile
# sometimes, you will have images without an ending bit
# this takes care of those kind of (corrupt) images
ImageFile.LOAD_TRUNCATED_IMAGES = True
class ClassificationDataset:
Approaching (Almost) Any Machine Learning Problem
196
 """
 A general classification dataset class that you can use for all
 kinds of image classification problems. For example,
 binary classification, multi-class, multi-label classification
 """
 def __init__(
 self,
 image_paths,
 targets,
 resize=None,
 augmentations=None
 ):
 """
 :param image_paths: list of path to images
 :param targets: numpy array
 :param resize: tuple, e.g. (256, 256), resizes image if not None
 :param augmentations: albumentation augmentations
 """
 self.image_paths = image_paths
 self.targets = targets
 self.resize = resize
 self.augmentations = augmentations
 def __len__(self):
 """
 Return the total number of samples in the dataset
 """
 return len(self.image_paths)
 def __getitem__(self, item):
 """
 For a given "item" index, return everything we need
 to train a given model
 """
 # use PIL to open the image
 image = Image.open(self.image_paths[item])
 # convert image to RGB, we have single channel images
 image = image.convert("RGB")
 # grab correct targets
 targets = self.targets[item]
 # resize if needed
 if self.resize is not None:
 image = image.resize(
 (self.resize[1], self.resize[0]),
 resample=Image.BILINEAR
 )
Approaching (Almost) Any Machine Learning Problem
197
 # convert image to numpy array
 image = np.array(image)
 # if we have albumentation augmentations
 # add them to the image
 if self.augmentations is not None:
 augmented = self.augmentations(image=image)
 image = augmented["image"]
 # pytorch expects CHW instead of HWC
 image = np.transpose(image, (2, 0, 1)).astype(np.float32)
 # return tensors of image and targets
 # take a look at the types!
 # for regression tasks,
 # dtype of targets will change to torch.float
 return {
 "image": torch.tensor(image, dtype=torch.float),
 "targets": torch.tensor(targets, dtype=torch.long),
 }




# engine.py
import torch
import torch.nn as nn
from tqdm import tqdm
def train(data_loader, model, optimizer, device):
 """
 This function does training for one epoch
 :param data_loader: this is the pytorch dataloader
 :param model: pytorch model
 :param optimizer: optimizer, for e.g. adam, sgd, etc
 :param device: cuda/cpu
 """
 # put the model in train mode
 model.train()
 # go over every batch of data in data loader
 for data in data_loader:
Approaching (Almost) Any Machine Learning Problem
198
 # remember, we have image and targets
 # in our dataset class
 inputs = data["image"]
 targets = data["targets"]
 # move inputs/targets to cuda/cpu device
 inputs = inputs.to(device, dtype=torch.float)
 targets = targets.to(device, dtype=torch.float)
 # zero grad the optimizer
 optimizer.zero_grad()
 # do the forward step of model
 outputs = model(inputs)
 # calculate loss
 loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
 # backward step the loss
 loss.backward()
 # step optimizer
 optimizer.step()
 # if you have a scheduler, you either need to
 # step it here or you have to step it after
 # the epoch. here, we are not using any learning
 # rate scheduler
def evaluate(data_loader, model, device):
 """
 This function does evaluation for one epoch
 :param data_loader: this is the pytorch dataloader
 :param model: pytorch model
 :param device: cuda/cpu
 """
 # put model in evaluation mode
 model.eval()
 # init lists to store targets and outputs
 final_targets = []
 final_outputs = []
 # we use no_grad context
 with torch.no_grad():
 for data in data_loader:
 inputs = data["image"]
 targets = data["targets"]
 inputs = inputs.to(device, dtype=torch.float)
 targets = targets.to(device, dtype=torch.float)
Approaching (Almost) Any Machine Learning Problem
199
 # do the forward step to generate prediction
 output = model(inputs)
 # convert targets and outputs to lists
 targets = targets.detach().cpu().numpy().tolist()
 output = output.detach().cpu().numpy().tolist()

 # extend the original list
 final_targets.extend(targets)
 final_outputs.extend(output)

 # return final output and final targets
 return final_outputs, final_targets



# model.py
import torch.nn as nn
import pretrainedmodels
def get_model(pretrained):
 if pretrained:
 model = pretrainedmodels.__dict__["alexnet"](
 pretrained='imagenet'
 )
 else:
 model = pretrainedmodels.__dict__["alexnet"](
 pretrained=None
 )
 # print the model here to know whats going on.
 model.last_linear = nn.Sequential(
 nn.BatchNorm1d(4096),
 nn.Dropout(p=0.25),
 nn.Linear(in_features=4096, out_features=2048),
Approaching (Almost) Any Machine Learning Problem
200
 nn.ReLU(),
 nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1),
 nn.Dropout(p=0.5),
 nn.Linear(in_features=2048, out_features=1),
 )
 return model

Reference

https://www.amazon.com.au/Approaching-Almost-Machine-Learning-Problem-ebook/dp/B089P13QHT