Source code for brainspy.utils.performance.data

"""
Package for describing the dataset for training a perceptron after the signal of a Processor class or one of its children.
"""
import torch
from torch.utils.data import Dataset



[docs]
def get_data(results, batch_size):
    """
    Initialises the perceptron Dataset and loads the dataset into the Pytorch Dataloader.
    The dataloader loads the data into the memory according to the batch size.
    Refer to https://pytorch.org/tutorials/beginner/basics/data_tutorial.html for DataLoaders
    in PyTorch.

    The data can be shuffled. After each epoch the data is shuffled automatically. This is by
    design to accelerate and improve the model training process.Because of this, the learning
    algorithm is stochastic and may achieve different results each time it is run.
    Refer to https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html to see how epochs
    are used when training a Classifier.

    Parameters
    ----------
    results : dict
        These contain the input and target values of the perceptron alogorithm.
        It also contains the normalised input data from which the Pytorch dataloader is created.

        It has the following keys:

        inputs : torch.Tensor
            The inputs to the perceptron algorithm, which are the outputs of the DNPU or DNPU
            architectures that you want to evaluate the accuracy against.
        norm_inputs : torch.Tensor
            Standard normal distribution of the input data. To calculate this, the
            zscore_norm function can be used in brainspy.utils.performance.accuracy
        targets : torch.Tensor
            Binary targets against which the outuut of the perceptron algorithm is compared.

    batch size- The batch size defines the number of samples that will be propagated
                            through the network.

    Returns
    -------
    torch.utils.data.Dataloader
        Dataloader of the perceptron algorithm
    """

    assert type(results) == dict, "Results field should be of type - dict"
    assert type(batch_size) == int, "Batch size should be of type - int"
    assert batch_size > 0, "batch_size should be a positive integer value"

    assert type(
        results["inputs"]
    ) == torch.Tensor, "Input data should be of type - torch.Tensor"
    assert type(
        results["norm_inputs"]
    ) == torch.Tensor, "Normalized Input data should be of type - torch.Tensor"
    assert type(
        results["targets"]
    ) == torch.Tensor, "Target data should be of type - torch.Tensor"

    dataset = PerceptronDataset(results["norm_inputs"], results["targets"])
    dataloaders = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    return dataloaders




[docs]
class PerceptronDataset(Dataset):
    """
    This class is an instace of the Pytorch Dataset. It passes all the information onto the Pytorch
    dataset. The dataset stores the samples and their corresponding labels, and DataLoader wraps an
    iterable around the Dataset to enable easy access to the samples.

    Refer to https://pytorch.org/tutorials/beginner/basics/data_tutorial.html to see how Pytorch
    datasets are created and used.

    """
    def __init__(self, inputs, targets, device=None):
        """
        Initialize the dataset of the Perceptron

        Parameters
        ----------
        inputs : torch.Tensor
            the inputs to the perceptron algorithm, which are the outputs of the DNPU or DNPU
            architectures that you want to evaluate the accuracy against
        targets : torch.Tensor
            binary targets against which the outuut of the perceptron algorithm is compared
        device : torch.Device, optional
            torch device is CUDA or CPU, by default None
        """

        # Normalise inputs
        assert len(
            inputs) > 10, "Not enough data, at least 10 points are required."
        assert not torch.isnan(inputs).any(), "NaN values detected."
        if device is None:
            self.inputs = inputs.to(dtype=torch.get_default_dtype())
            self.targets = targets.to(dtype=torch.get_default_dtype())
        else:
            self.inputs = inputs.to(device=device,
                                    dtype=torch.get_default_dtype())
            self.targets = targets.to(device=device,
                                      dtype=torch.get_default_dtype())

    def __getitem__(self, index):
        """
        Gets the input and target at a given index in this Perceptron dataset

        Parameters
        ----------
        index : int
            position/index of the required input and target

        Returns
        -------
        (int,int)
            tuple of input and target at a given index
        """
        inputs = self.inputs[index, :]
        targets = self.targets[index, :]
        return (inputs, targets)

    def __len__(self):
        """
        Get the length of the input values

        Returns
        -------
        int
            length of the input dataset
        """
        return len(self.inputs)