Source code for brainspy.utils.signal

"""
Set of fitness functions for genetic algorithm and loss functions for gradient
descent.
"""
import warnings

import torch

from brainspy.utils.pytorch import TorchUtils
from brainspy.utils.performance.accuracy import get_accuracy



[docs]
def accuracy_fit(output: torch.Tensor,
                 target: torch.Tensor,
                 default_value=False) -> torch.Tensor:
    """
    Fitness function for genetic algorithm using accuracy of a perceptron.
    Teaches single perceptron to transform output to target and
    evaluates the accuracy; is a percentage.
    Will return default value (0) if indicated.

    Needs at least 10 datapoints in each signal.

    Example
    -------
    >>> accuracy_fit(torch.rand((100, 3)), torch.rand(100, 3))
    torch.Tensor([48., 21.2, 3.5])
    >>> accuracy_fit(torch.rand((100, 3)), torch.rand(100, 3), True)
    torch.Tensor([0.0, 0.0, 0.0])

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, shape [n, m] with m signals of n datapoints.
    default_value : bool, optional
        Return the default value or not, by default False.

    Returns
    -------
    torch.Tensor
        Default value or calculated fitness for each pair of signals.

    Raises
    ------
    AssertionError
        If dimensions of output and target are not the same.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor or type(
            default_value) != bool:
        raise AssertionError("Invalid type for arguments provided")
    assert output.shape == target.shape, "Dimensions of data are different."

    if default_value:
        return torch.zeros(output.shape[1],
                           device=output.device,
                           dtype=output.dtype)
    else:
        result = torch.zeros(output.shape[1],
                             device=output.device,
                             dtype=output.dtype)
        for i in range(output.shape[1]):
            result[i] = get_accuracy(output[:, i].unsqueeze(1),
                                     target[:,
                                            i].unsqueeze(1))["accuracy_value"]
        return result




[docs]
def corr_fit(output: torch.Tensor,
             target: torch.Tensor,
             default_value=False) -> torch.Tensor:
    """
    Fitness function for genetic algorithm using Pearson correlation.
    See pearsons_correlation for more info.
    Will return default value (-1) if indicated.

    Example
    -------
    >>> corr_fit(torch.rand((100, 3)), torch.rand(100, 3))
    torch.Tensor([0.5, 0.4, -0.34])
    >>> corr_fit(torch.rand((100, 3)), torch.rand(100, 3), True)
    torch.Tensor([-1.0, -1.0, -1.0])

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, shape [n, m] with m signals of n datapoints.
    default_value : bool, optional
        Return the default value or not, by default False.

    Returns
    -------
    torch.Tensor
        Default value or calculated fitness for each pair of signals.

    Raises
    ------
    AssertionError
        If dimensions of output and target are not the same.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor or type(
            default_value) != bool:
        raise AssertionError("Invalid type for arguments provided")
    assert output.shape == target.shape, "Dimensions of data are different."
    if default_value:
        return -torch.ones(
            output.shape[1], device=output.device, dtype=output.dtype)
    else:
        return pearsons_correlation(output, target)




[docs]
def corrsig_fit(output: torch.Tensor,
                target: torch.Tensor,
                default_value=False,
                sigmoid_center=0,
                sigmoid_scale=1) -> torch.Tensor:
    """
    Fitness function for genetic algorithm using correlation and a sigmoid
    function.
    Will return default value (-1) if indicated.

    For values of parameters see this paper:
    https://www.nature.com/articles/s41565-020-00779-y

    Note: target data must be binary for this to work.

    Example
    -------
    >>> corrsig_fit(torch.rand((100, 3)), torch.round(torch.rand(100, 3)))
    torch.Tensor([0.5, 0.4, -0.34])
    >>> corrsig_fit(torch.rand((100, 3)), torch.round(torch.rand(100, 3)),
                    True)
    torch.Tensor([-1.0, -1.0, -1.0])

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, shape [n, m] with m signals of n datapoints;
        should be binary.
    default_value : bool, optional
        Return the default value or not, by default False.
    sigmoid_center : float
        Shift of the sigmoid, by default 0.
    sigmoid_scale : float
        Scale of the sigmoid, by default 1.

    Returns
    -------
    torch.Tensor
        Default value or calculated fitness for each pair of signals.
        Will be NaN if target data is not binary.

    Raises
    ------
    AssertionError
        If dimensions of output and target are not the same.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor or type(
            default_value) != bool:
        raise AssertionError("Invalid type for arguments provided")
    if default_value:
        return -torch.ones(
            output.shape[1], device=output.device, dtype=output.dtype)
    else:
        assert output.shape == target.shape, "Dimensions of data are different."
        corr = pearsons_correlation(output, target)
        sig = torch.zeros(output.shape[1],
                          device=output.device,
                          dtype=output.dtype)
        for i in range(output.shape[1]):
            sep = output[:, i][target[:, i] == 1].mean() - output[:, i][
                target[:, i] == 0].mean()
            sig[i] = torch.sigmoid(sigmoid_scale * (sep - sigmoid_center))
        return corr * sig




[docs]
def pearsons_correlation(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    """
    Measure the Pearson correlation between two sets of data (how much the two
    sets are linearly related). Value is between -1 and 1, where 1 is positive
    correlation, -1 is negative, and 0 is no correlation.

    An explanation and the formula for correlation:
    https://www.socscistatistics.com/tests/pearson/

    Example
    -------
    >>> pearsons_correlation(torch.rand((100, 1)), torch.rand((100, 1)))
    torch.Tensor(0.5)

    Parameters
    ----------
    x : torch.Tensor
        Dataset, shape [n, m] with m signals of n datapoints.
    y : torch.Tensor
        Dataset, shape [n, m] with m signals of n datapoints.

    Returns
    -------
    torch.Tensor
        Correlation between x and y for each pair of signals.
        Will be nan if a data is uniform.

    Raises
    ------
    AssertionError
        If dimensions of x and y are not the same.
    UserWarning
        If result is nan (which happens if a dataset has variance 0, is
        uniform).
    """
    if type(x) != torch.Tensor or type(y) != torch.Tensor:
        raise AssertionError("Invalid type for arguments provided")
    assert x.shape == y.shape, "Dimensions of data are different."
    vx = x - x.mean(dim=0)
    vy = y - y.mean(dim=0)
    sum_vx = torch.sum(vx**2, dim=0)
    sum_vy = torch.sum(vy**2, dim=0)
    sum_vxy = torch.sum(vx * vy, dim=0)
    if 0.0 in sum_vx or 0.0 in sum_vy:
        warnings.warn("Variance of dataset is 0, correlation is nan.")
    return sum_vxy / (torch.sqrt(sum_vx) * torch.sqrt(sum_vy))




[docs]
def corrsig(output: torch.Tensor,
            target: torch.Tensor,
            sigmoid_center: float = 0,
            sigmoid_scale: float = 1,
            corr_shift: float = 1.1) -> torch.Tensor:
    """
    Loss function for gradient descent using a sigmoid function.

    For values of parameters see this paper:
    https://www.nature.com/articles/s41565-020-00779-y

    Example
    -------
    >>> corrsig(torch.rand((100, 1)), torch.round(torch.rand((100, 1))))
    torch.Tensor(2.5)

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, shape [n, m] with m signals of n datapoints;
        should be binary.
    sigmoid_center : float
        Center of the sigmoid.
    sigmoid_scale : float
        Scale of the sigmoid, between 0 and 1.
    corr_shift : float
        Shifting the correlation value.

    Returns
    -------
    torch.Tensor
        Value of loss function for each pair of signals.

    Raises
    ------
    AssertionError
        If dimensions of x and y are not the same.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor:
        raise AssertionError("Invalid type for arguments provided")
    assert output.shape == target.shape, "Dimensions of data are different."
    corr = pearsons_correlation(output, target)
    # difference between smallest false negative and largest false positive
    delta = torch.zeros(output.shape[1],
                        device=output.device,
                        dtype=output.dtype)
    for i in range(output.shape[1]):
        x_high_min = torch.min(output[:, i][target[:, i] == 1])
        x_low_max = torch.max(output[:, i][(target[:, i] == 0)])
        delta[i] = x_high_min - x_low_max

    return (corr_shift - corr) / torch.sigmoid(
        (delta - sigmoid_center) / sigmoid_scale)




[docs]
def fisher_fit(output: torch.Tensor,
               target: torch.Tensor,
               default_value=False) -> torch.Tensor:
    """
    Fitness function for genetic algorithm using the negative of the
    Fisher linear discriminant. For more information see fisher method.

    Can return default value (0).

    Example
    -------
    >>> fisher_fit(torch.rand((100, 3)), torch.rand((100, 3)),
                   False)
    torch.Tensor([2.5, 1.2, 0.5])
    >>> fisher_fit(torch.rand((100, 3)), torch.rand((100, 3)),
                   True)
    torch.Tensor([0.0, 0.0, 0.0])

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, shape [n, m] with m signals of n datapoints;
        should be binary.
    default_value : bool, optional
        Return the default value or not, by default False.

    Returns
    -------
    torch.Tensor
        Default value or calculated fitness for each pair of signals.

    Raises
    ------
    AssertionError
        If dimensions of x and y are not the same.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor or type(
            default_value) != bool:
        raise AssertionError("Invalid type for arguments provided")
    assert output.shape == target.shape, "Dimensions of data are different."
    if default_value:
        return torch.zeros(output.shape[1],
                           device=output.device,
                           dtype=output.dtype)
    else:
        return fisher(output, target)




[docs]
def fisher(output: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    """
    Calculate the negative of the Fisher linear discriminant between
    two datasets. Used as a loss function for gradient descent.

    More information here:
    https://sthalles.github.io/fisher-linear-discriminant/

    Example
    -------
    >>> fisher(torch.rand((100, 3)), torch.rand((100, 3)),
                   False)
    torch.Tensor([2.5, 1.2, 0.5])

    Parameters
    ----------
    output : torch.Tensor
        Dataset, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        Dataset, shape [n, m] with m signals of n datapoints;
        should be binary.

    Returns
    -------
    torch.Tensor
        Value of Fisher linear discriminant for each pair of signals.

    Raises
    ------
    AssertionError
        If dimensions of x and y are not the same.
    UserWarning
        If result is nan (which happens if a dataset has variance 0, is
        uniform).
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor:
        raise AssertionError("Invalid type for arguments provided")
    assert output.shape == target.shape, "Dimensions of data are different."
    result = torch.zeros(output.shape[1],
                         device=output.device,
                         dtype=output.dtype)
    for i in range(output.shape[1]):
        x_high = output[:, i][(target[:, i] == 1)]
        x_low = output[:, i][(target[:, i] == 0)]
        m0, m1 = torch.mean(x_low), torch.mean(x_high)
        s0, s1 = torch.var(x_low), torch.var(x_high)
        if 0.0 in s0 or 0.0 in s1:
            warnings.warn("Variance of dataset is 0, correlation is nan.")
        mean_separation = (m1 - m0)**2
        result[i] = mean_separation / (s0 + s1)
    return -result




[docs]
def sigmoid_nn_distance(output: torch.Tensor,
                        target: torch.Tensor = None,
                        sigmoid_center: float = 0.5,
                        sigmoid_scale: float = 2.0) -> torch.Tensor:
    """
    Sigmoid of nearest neighbour distance: a squashed version of a sum of all
    internal distances between points.
    Used as a loss function for gradient descent.

    For values of parameters see this paper:
    https://www.nature.com/articles/s41565-020-00779-y

    Example
    -------
    >>> sigmoid_nn_distance(torch.rand((100, 3)))
    torch.Tensor([20.0, 11.0, 10.0])

    Parameters
    ----------
    output : torch.Tensor
        The output data, shape [n, m] with m signals of n datapoints.
    target : torch.Tensor
        The target data, will not be used.
    sigmoid_center : float
        Center of the sigmoid.
    sigmoid_scale : float
        Scale of the sigmoid, between 0 and 1.

    Returns
    -------
    torch.Tensor
        Sigmoid of the sum of the nearest neighbor distances (output).

    Raises
    ------
    UserWarning
        If target data is provided to warn that it will not be used.
    """
    if type(output) != torch.Tensor or type(target) != torch.Tensor:
        raise AssertionError("Invalid type for arguments provided")
    if target is not None:
        warnings.warn(
            "This loss function does not use target values. Target ignored.")
    dist_nn = get_clamped_intervals(output, mode="single_nn")
    return -1 * torch.mean(
        torch.sigmoid(dist_nn / sigmoid_scale) - sigmoid_center, dim=0)




[docs]
def get_clamped_intervals(output: torch.Tensor,
                          mode: str,
                          boundaries=[0.0, 1.0]) -> torch.Tensor:
    """
    Sort and clamp the data, and find the distances between the datapoints.

    There are three modes:
    "single_nn" - for each point the smaller distance to a neighbor
    "double_nn" - simply the distances between the points
    "intervals" - for each point the summed distance to the point in front
    and behind it

    Example
    -------
    >>> output = torch.tensor([3.0, 1.0, 8.0, 9.0, 5.0]).unsqueeze(dim=1)
    >>> clamp = [1, 9]
    >>> get_clamped_intervals(output, "single_nn", clamp)
    torch.tensor([0.0, 2.0, 2.0, 1.0, 0.0])
    >>> get_clamped_intervals(output, "double_nn", clamp)
    torch.tensor([0.0, 2.0, 2.0, 3.0, 1.0, 0.0])
    >>> get_clamped_intervals(output, "intervals", clamp)
    torch.tensor([2.0, 4.0, 5.0, 4.0, 1.0])

    Here we have a dataset which ordered is 1, 3, 5, 8, 9.
    The distances between the points are 0, 2, 2, 3, 1, 0 (double).
    The smaller distance for each is 0, 2, 2, 1, 0 (single).
    The sum from both sides is 2, 4, 5, 4, 1 (intervals).

    Parameters
    ----------
    output : torch.Tensor
        Dataset, shape [n, m] with m signals of n datapoints.
    mode : str
        Mode for nearest neighbor. Can be
        "single_nn", "double_nn" or "intervals"
    boundaries : list[float], optional
        Boundary values for clamping [min, max].

    Returns
    -------
    torch.Tensor
        Distances between the datapoints.

    Raises
    ------
    UserWarning
        If mode not recognized.
    """
    if type(output) != torch.Tensor or type(mode) != str:
        raise AssertionError("Invalid type for arguments provided")
    # First we sort the output, and clip the output to a fixed interval.
    output_sorted = output.sort(dim=0)[0]
    output_clamped = output_sorted.clamp(boundaries[0], boundaries[1])

    # Then we prepare two tensors which we subtract from each other to
    # calculate nearest neighbour distances.
    boundaries = TorchUtils.format(boundaries,
                                   device=output.device,
                                   data_type=output.dtype)
    boundary_low = boundaries[0] * torch.ones(
        [1, output.shape[1]], device=output.device, dtype=output.dtype)
    boundary_high = boundaries[1] * torch.ones(
        [1, output.shape[1]], device=output.device, dtype=output.dtype)
    output_highside = torch.cat((output_clamped, boundary_high), dim=0)
    output_lowside = torch.cat((boundary_low, output_clamped), dim=0)

    multiplier = torch.ones_like(output_highside, device=output.device)
    multiplier.type_as(output)
    multiplier[0] = 1
    multiplier[-1] = 1

    # Calculate the actual distance between points
    dist = (output_highside - output_lowside) * multiplier

    if mode == "single_nn":
        # Only give nearest neighbour (single!) distance
        return torch.minimum(dist[1:], dist[:-1])
    elif mode == "double_nn":
        return dist
    elif mode == "intervals":
        # Determine the intervals between the points, up and down together.
        intervals = dist[1:] + dist[:-1]
        return intervals
    else:
        warnings.warn("Nearest neightbour distance mode not recongized; "
                      "assuming double_nn.")
        return dist