Source code for brainspy.utils.performance.data

"""
Package for describing the dataset for training a perceptron after the signal of a Processor class or one of its children.
"""
import torch
from torch.utils.data import Dataset


[docs] def get_data(results, batch_size): """ Initialises the perceptron Dataset and loads the dataset into the Pytorch Dataloader. The dataloader loads the data into the memory according to the batch size. Refer to https://pytorch.org/tutorials/beginner/basics/data_tutorial.html for DataLoaders in PyTorch. The data can be shuffled. After each epoch the data is shuffled automatically. This is by design to accelerate and improve the model training process.Because of this, the learning algorithm is stochastic and may achieve different results each time it is run. Refer to https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html to see how epochs are used when training a Classifier. Parameters ---------- results : dict These contain the input and target values of the perceptron alogorithm. It also contains the normalised input data from which the Pytorch dataloader is created. It has the following keys: inputs : torch.Tensor The inputs to the perceptron algorithm, which are the outputs of the DNPU or DNPU architectures that you want to evaluate the accuracy against. norm_inputs : torch.Tensor Standard normal distribution of the input data. To calculate this, the zscore_norm function can be used in brainspy.utils.performance.accuracy targets : torch.Tensor Binary targets against which the outuut of the perceptron algorithm is compared. batch size- The batch size defines the number of samples that will be propagated through the network. Returns ------- torch.utils.data.Dataloader Dataloader of the perceptron algorithm """ assert type(results) == dict, "Results field should be of type - dict" assert type(batch_size) == int, "Batch size should be of type - int" assert batch_size > 0, "batch_size should be a positive integer value" assert type( results["inputs"] ) == torch.Tensor, "Input data should be of type - torch.Tensor" assert type( results["norm_inputs"] ) == torch.Tensor, "Normalized Input data should be of type - torch.Tensor" assert type( results["targets"] ) == torch.Tensor, "Target data should be of type - torch.Tensor" dataset = PerceptronDataset(results["norm_inputs"], results["targets"]) dataloaders = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, ) return dataloaders
[docs] class PerceptronDataset(Dataset): """ This class is an instace of the Pytorch Dataset. It passes all the information onto the Pytorch dataset. The dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples. Refer to https://pytorch.org/tutorials/beginner/basics/data_tutorial.html to see how Pytorch datasets are created and used. """ def __init__(self, inputs, targets, device=None): """ Initialize the dataset of the Perceptron Parameters ---------- inputs : torch.Tensor the inputs to the perceptron algorithm, which are the outputs of the DNPU or DNPU architectures that you want to evaluate the accuracy against targets : torch.Tensor binary targets against which the outuut of the perceptron algorithm is compared device : torch.Device, optional torch device is CUDA or CPU, by default None """ # Normalise inputs assert len( inputs) > 10, "Not enough data, at least 10 points are required." assert not torch.isnan(inputs).any(), "NaN values detected." if device is None: self.inputs = inputs.to(dtype=torch.get_default_dtype()) self.targets = targets.to(dtype=torch.get_default_dtype()) else: self.inputs = inputs.to(device=device, dtype=torch.get_default_dtype()) self.targets = targets.to(device=device, dtype=torch.get_default_dtype()) def __getitem__(self, index): """ Gets the input and target at a given index in this Perceptron dataset Parameters ---------- index : int position/index of the required input and target Returns ------- (int,int) tuple of input and target at a given index """ inputs = self.inputs[index, :] targets = self.targets[index, :] return (inputs, targets) def __len__(self): """ Get the length of the input values Returns ------- int length of the input dataset """ return len(self.inputs)