Source code for vectorose.sphere_base

"""Basics for spherical histogram construction.

This module contains basic tools for different representations of
optionally-nested spherical histograms.
"""

import abc
from functools import partial
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd
import pandas.core.generic
import pyvista as pv

from . import util



[docs]
class SphereBase(abc.ABC):
    """Base class for a spherical histogram."""

    # Attributes
    number_of_shells: int
    """Number of shells to consider for bivariate vector histograms."""

    magnitude_range: Optional[Tuple[float, float]]
    """Range for the magnitude values.

    Maximum and minimum values to consider for the magnitude. If ``None``,
    then the maximum and minimum values are computed from the provided
    vectors.
    """

    magnitude_precision: Optional[int] = 8
    """Precision with which to round the magnitudes when binning.

    To avoid floating point errors, the vector magnitudes may be rounded
    before binning. This option allows the precision of the rounding to be
    set. If ``None``, then no rounding is performed.
    """

    @property
    def hist_group_cols(self) -> List[str]:
        """Names of the histogram columns to use for sorting."""
        return self.magnitude_shell_cols + self.orientation_cols

    @property
    def magnitude_shell_cols(self) -> List[str]:
        """Name of the histogram columns to use for magnitude."""
        return ["shell"]

    @property
    @abc.abstractmethod
    def orientation_cols(self) -> List[str]:
        """Name of the histogram columns to use for orientation."""
        raise NotImplementedError(
            "This abstract property must be implemented in subclasses."
        )

    def __init__(
        self,
        number_of_shells: int = 1,
        magnitude_range: Optional[Tuple[float, float]] = None,
    ):
        self.number_of_shells = number_of_shells
        self.magnitude_range = magnitude_range


[docs]
    def assign_histogram_bins(
        self, vectors: np.ndarray
    ) -> Tuple[pd.DataFrame, np.ndarray]:
        """Assign vectors to the appropriate histogram bin.

        Parameters
        ----------
        vectors
            Array of shape ``(n, 3)`` containing the Cartesian components
            of the vectors from which to construct the histogram.

        Returns
        -------
        pandas.DataFrame
            All the vectors, including additional columns for the shell and
            the implementation-specific orientation bin.
        numpy.ndarray
            Histogram bin edges for the magnitude shells.

        Warnings
        --------
        All zero-vectors must be removed from the dataset before
        processing. These vectors have no orientation and thus cannot be
        properly assigned to an orientation bin.
        """

        # Create the vector data frame
        histogram = util.convert_vectors_to_data_frame(vectors)

        # Perform any additional histogram preparation
        histogram = self._initial_vector_data_preparation(histogram)

        # Perform the magnitude computations
        magnitude_bins, magnitude_bin_edges = self._compute_magnitude_bins(histogram)
        histogram = pd.concat([histogram, magnitude_bins], axis=1)

        # Perform the orientation binning
        orientation_bins = self._compute_orientation_binning(histogram)
        histogram = pd.concat([histogram, orientation_bins], axis=1)

        # Return the complete histogram
        return histogram, magnitude_bin_edges



[docs]
    def _initial_vector_data_preparation(self, vectors: pd.DataFrame) -> pd.DataFrame:
        """Prepare the vectors for histogram construction.

        Convert the vectors into a representation specific for the
        histogram spherical implementation. If spatial coordinates are
        provided for the vectors, these are preserved.

        Parameters
        ----------
        vectors
            DataFrame with ``n`` rows and either 3 or 6 columns. The
            required vector component columns are ``vx, vy, vz``. Optional
            spatial coordinate columns are ``x, y, z``. It is preferrable
            (but not required) for the spatial columns to be the first 3
            columns.

        Returns
        -------
        pandas.DataFrame
            DataFrame containing ``n`` rows and a subclass-specific
            number of columns. The columns represent an alternative
            representation of the vectors to assist in orientation binning.
            If spatial coordinate columns were present in the original data
            they will be preserved in the output.

        Warnings
        --------
        This method should typically **not** be overridden. The
        implementation-specific functionality should be written in the
        method :meth:`_initial_vector_component_preparation`, which is
        called by this function.
        """

        processed_vector_data = self._initial_vector_component_preparation(vectors)

        # Add back the locations, if necessary
        number_of_columns = len(vectors.columns)

        if number_of_columns > 3:
            location_data = vectors.loc[:, ["x", "y", "z"]]
            processed_vector_data = location_data.join(processed_vector_data)

        return processed_vector_data



[docs]
    def _initial_vector_component_preparation(
        self, vectors: pd.DataFrame
    ) -> pd.DataFrame:
        """Prepare the vector components for histogram construction.

        Override this method to include specific operations that should be
        performed on the vectors in order to construct the histogram in the
        specific implementation.

        Warnings
        --------
        This function should **not** perform any tasks related to the
        vector spatial locations, if those are included in the data. Those
        are handled separately by :meth:`._initial_vector_data_preparation`
        which calls this function.
        """

        vector_components = vectors.loc[:, ["vx", "vy", "vz"]]

        return vector_components



[docs]
    def _compute_magnitude_bins(
        self, vectors: pd.DataFrame
    ) -> Tuple[pd.Series, np.ndarray]:
        """Perform binning based on magnitude.

        Construct the magnitude histogram for the provided vectors.

        Parameters
        ----------
        vectors
            The vectors from which the magnitude histogram is to be
            constructed.

        Returns
        -------
        pandas.Series
            The magnitude shell number for each vector, in a
            :class:`pandas.Series` called ``shell``.
        numpy.ndarray
            Array containing the histogram bin boundaries used to construct
            the histogram. The length of this array corresponds is one more
            than :attr:`SphereBase.number_of_shells`.
        """
        magnitudes = vectors.loc[:, "magnitude"]

        # Define the magnitude bin edges
        if self.number_of_shells > 1:
            if self.magnitude_range is None:
                offset = 10 ** -(self.magnitude_precision or 8)
                max_magnitude = magnitudes.max() + offset
                min_magnitude = magnitudes.min() - offset
                magnitude_range = (min_magnitude, max_magnitude)
            else:
                magnitude_range = self.magnitude_range
            magnitude_bin_edges = np.histogram_bin_edges(
                magnitudes, bins=self.number_of_shells, range=magnitude_range
            )

            # Don't consider the initial bin edge.
            internal_bin_edges = magnitude_bin_edges[1:]

            # Round the magnitudes, if requested
            if self.magnitude_precision is not None:
                magnitudes = np.round(magnitudes, self.magnitude_precision)

            # Assign the vectors the correct bins
            magnitude_histogram_bins = np.digitize(
                magnitudes, internal_bin_edges, right=True
            )
        else:
            magnitude_histogram_bins = np.zeros(len(magnitudes), dtype=int)
            # Set the minimum bin to be the smallest dataset value
            lower_bin = magnitudes.min()

            # Set the upper bin to be just above the maximum value
            upper_bin = magnitudes.max() + (10 ** -(self.magnitude_precision - 1))
            magnitude_bin_edges = np.array([lower_bin, upper_bin])

        magnitude_histogram_bins = pd.Series(magnitude_histogram_bins, name="shell")

        return magnitude_histogram_bins, magnitude_bin_edges



[docs]
    @abc.abstractmethod
    def _compute_orientation_binning(
        self, vectors: pd.DataFrame
    ) -> pd.core.generic.NDFrame:
        """Bin the provided vectors based on orientation.

        Parameters
        ----------
        vectors
            The vectors to place in orientation bins.

        Returns
        -------
        pandas.Series or pandas.DataFrame
            The orientation bin(s) corresponding to each vector. The number
            of columns will depend on the specific sphere representation
            used.
        """

        raise NotImplementedError("Subclasses must implement this abstract method!")



[docs]
    def _construct_histogram_index(self) -> pd.MultiIndex:
        """Construct the index for the histogram."""

        magnitude_index = self._construct_magnitude_index()
        orientation_index = self._construct_orientation_index()

        magnitude_index_arr = magnitude_index.to_frame(index=False).to_numpy()
        orientation_index_arr = orientation_index.to_frame(index=False).to_numpy()

        number_of_shells = len(magnitude_index_arr)
        number_of_orientations = len(orientation_index_arr)

        # Repeat each index
        magnitude_index_complete = np.repeat(
            magnitude_index_arr, number_of_orientations, axis=0
        )
        orientation_index_complete = np.tile(
            orientation_index_arr, (number_of_shells, 1)
        )

        raw_index_arrays = [magnitude_index_complete, orientation_index_complete]
        headers_arrays = [self.magnitude_shell_cols, self.orientation_cols]

        # And now combine everything!
        index_array = np.concatenate(raw_index_arrays, axis=-1)
        headers = np.concatenate(headers_arrays)

        # And build a data frame from this
        index_data_frame = pd.DataFrame(index_array, columns=headers)

        multi_index = pd.MultiIndex.from_frame(index_data_frame)

        return multi_index



[docs]
    def _construct_magnitude_index(self) -> pd.Index:
        """Construct the index for the magnitude bins."""

        index = pd.RangeIndex(
            start=0, stop=self.number_of_shells, name=self.magnitude_shell_cols[0]
        )

        return index



[docs]
    @abc.abstractmethod
    def _construct_orientation_index(self) -> pd.Index:
        """Construct the index for the orientation bins."""

        raise NotImplementedError("Subclasses must implement this abstract method!")



[docs]
    def construct_histogram(
        self,
        binned_data: pd.DataFrame,
        return_fraction: bool = True,
    ) -> pd.Series:
        """Construct a histogram based on the labelled data.

        Using the binned data, construct a histogram with either the counts
        or the proportion of points in each face.

        Parameters
        ----------
        binned_data
            All vectors, with their respective bins, depending on the
            current sphere design.
        return_fraction
            Indicate whether the values returned should be the raw counts
            or the proportions.

        Returns
        -------
        pandas.Series
            The counts or proportions of vectors in each case, ordered by
            the columns specified in
            :attr:`SphereBase.hist_group_cols`.
        """

        grouping_columns = self.hist_group_cols

        # Use groupby to perform the grouping
        original_histogram = binned_data.groupby(grouping_columns).apply(
            len, include_groups=False
        )

        # Modify the index to account for any missing bins.
        multi_index = self._construct_histogram_index()

        filled_histogram = original_histogram.reindex(index=multi_index, fill_value=0)
        filled_histogram.name = "frequency"

        if return_fraction:
            number_of_vectors = len(binned_data)

            filled_histogram /= number_of_vectors

        return filled_histogram



[docs]
    def construct_marginal_magnitude_histogram(
        self, binned_data: pd.DataFrame, return_fraction: bool = True
    ) -> pd.Series:
        """Construct the marginal magnitude histogram.

        Compute the marginal histogram of the magnitude data, disregarding
        the orientation differences. The resulting histogram has the same
        number of bins as the number of shells.

        Parameters
        ----------
        binned_data
            Data frame containing the labelled vectors.
        return_fraction
            Indicate whether the values returned should be the raw counts
            or the proportions.

        Returns
        -------
        pandas.Series
            The counts or proportions of vectors in each magnitude shell.

        See Also
        --------
        SphereBase.assign_histogram_bins:
            Label a set of vectors into different bins.
        SphereBase.construct_histogram:
            Construct a bivariate magnitude and orientation histogram.
        SphereBase.construct_marginal_orientation_histogram:
            Construct a marginal orientation histogram.
        """

        # Group based only on the magnitude bin
        counts_by_shell = binned_data.groupby(self.magnitude_shell_cols).apply(
            len, include_groups=False
        )

        # Construct the index (in case some bins are zero).
        magnitude_index = self._construct_magnitude_index()

        magnitude_histogram = counts_by_shell.reindex(
            index=magnitude_index, fill_value=0
        )

        if return_fraction:
            number_of_vectors = len(binned_data)

            magnitude_histogram /= number_of_vectors

        return magnitude_histogram



[docs]
    def construct_marginal_orientation_histogram(
        self, binned_data: pd.DataFrame, return_fraction: bool = True
    ) -> pd.Series:
        """Construct the marginal orientation histogram.

        Compute the marginal histogram of the orientation data,
        disregarding the magnitude differences. The resulting histogram has
        the same configuration of bins as a single shell.

        Parameters
        ----------
        binned_data
            Data frame containing the labelled vectors.
        return_fraction
            Indicate whether the values returned should be the raw counts
            or the proportions.

        Returns
        -------
        pandas.Series
            The counts or proportions of vectors in each orientation bin.

        See Also
        --------
        SphereBase.assign_histogram_bins:
            Label a set of vectors into different bins.
        SphereBase.construct_histogram:
            Construct a bivariate magnitude and orientation histogram.
        SphereBase.construct_marginal_magnitude_histogram:
            Construct a marginal magnitude histogram.
        """

        # Group based on only the orientation data
        counts_by_orientation = binned_data.groupby(self.orientation_cols).apply(
            len, include_groups=False
        )

        # Construct the index (in case some orientations are zero).
        orientation_index = self._construct_orientation_index()
        orientation_index.names = self.orientation_cols

        orientation_histogram = counts_by_orientation.reindex(
            orientation_index, fill_value=0
        )

        if return_fraction:
            number_of_vectors = len(binned_data)

            orientation_histogram /= number_of_vectors

        return orientation_histogram



[docs]
    def construct_conditional_orientation_histogram(
        self, binned_data: pd.DataFrame
    ) -> pd.Series:
        """Construct the conditional orientation histogram.

        Construct the histogram of orientations conditioned on the
        magnitude. Within each shell, the returned fractions sum to 1.

        Parameters
        ----------
        binned_data
            Data frame containing the labelled vectors.

        Returns
        -------
        pandas.Series
            The proportion of vectors in each orientation relative to all
            vectors within that shell. The index used is the same as that
            obtained in the bivariate case.

        Warnings
        --------
        Unlike the bivariate and marginal histograms, this method does not
        allow returning raw counts. The returned values are proportions
        relative to each shell.
        """

        # Get the bivariate histogram with the counts
        bivariate_histogram = self.construct_histogram(
            binned_data, return_fraction=False
        )

        # And now, get the marginal magnitude histogram
        marginal_magnitude_histogram = self.construct_marginal_magnitude_histogram(
            binned_data, return_fraction=False
        )

        # Divide the bivariate distribution by the marginal to get the
        # conditional distribution.
        orientation_given_magnitude = bivariate_histogram / marginal_magnitude_histogram

        return orientation_given_magnitude



[docs]
    def construct_conditional_magnitude_histogram(
        self, binned_data: pd.DataFrame
    ) -> pd.Series:
        """Construct the conditional magnitude histogram.

        Construct the histogram of magnitudes conditioned on the
        orientation. Within each orientation bin, the returned fractions
        sum to 1.

        Parameters
        ----------
        binned_data
            Data frame containing the labelled vectors.

        Returns
        -------
        pandas.Series
            The proportion of vectors in each magnitude shell relative to
            all vectors having that orientation. The index used is the
            same as that obtained in the bivariate case, having the
            magnitude first, followed by the orientation parameters.

        Warnings
        --------
        Unlike the bivariate and marginal histograms, this method does not
        allow returning raw counts. The returned values are proportions
        relative to each shell.
        """

        # Get the bivariate histogram with the counts
        bivariate_histogram = self.construct_histogram(
            binned_data,
            return_fraction=False,
        )

        # And now, get the marginal magnitude histogram
        marginal_orientation_histogram = self.construct_marginal_orientation_histogram(
            binned_data, return_fraction=False
        )

        # Divide the bivariate distribution by the marginal to get the
        # conditional distribution.
        magnitude_given_orientation = (
            bivariate_histogram / marginal_orientation_histogram
        )

        return magnitude_given_orientation



[docs]
    @abc.abstractmethod
    def create_mesh(self) -> pv.PolyData:
        """Return the mesh representation of the current sphere."""

        raise NotImplementedError("Subclasses must implement this abstract method!")



[docs]
    def create_shell_mesh(
        self,
        histogram: pd.Series,
        radius: float = 1.0,
        series_name: Optional[str] = "frequency",
    ) -> pv.PolyData:
        """Create the mesh for a given shell.

        Using the provided histogram data for a specific shell, produce a
        sphere with the desired radius, storing the frequencies as face
        values.

        Parameters
        ----------
        histogram
            The counts or frequencies of orientations in each sphere face
            of the specific shell.
        radius
            Desired shell radius. This typically corresponds to a magnitude
            bin upper bound.
        series_name
            The name to associate with the provided scalar data. If `None`,
            then the value of :attr:`pandas.Series.name` is used.

        Returns
        -------
        pyvista.PolyData
            The constructed shell containing the desired scalars in the
            specified slot.
        """

        # First, construct the mesh that will underlie the shell
        shell_mesh = self.create_mesh()

        # Now, adjust the radius
        shell_mesh = shell_mesh.scale(radius)

        # Get the name
        series_name = series_name or histogram.name

        # Set the scalar values
        shell_mesh.cell_data[series_name] = histogram.astype(float)

        return shell_mesh



[docs]
    def create_histogram_meshes(
        self,
        histogram_data: pd.Series,
        magnitude_bins: Optional[np.ndarray],
        normalise_by_shell: bool = False,
    ) -> List[pv.PolyData]:
        """Create mesh shells for the supplied histogram.

        Parameters
        ----------
        histogram_data
            The binned histogram data, ordered by shell and then other
            implementation-specific parameters.
        magnitude_bins
            The upper bounds for the magnitude bins. These are used to
            determine the radius of each shell. If None, then all shells
            will have a radius of 1.
        normalise_by_shell
            Indicate whether each shell should be normalised with respect
            to its maximum value.


        Returns
        -------
        list of pyvista.PolyData
            List containing one mesh for each shell, with the appropriate
            scalar values assigned to the ``frequency`` array.

        Warnings
        --------
        The provided histogram must have been constructed with the current
        sphere, or an equivalent sphere.

        Notes
        -----
        The option `normalise_by_shell` produces meshes where the faces
        values are divided by the maximum value in their corresponding
        shell. The values can therefore be thought of as representing
        fractions of the respective maxima.
        """

        number_of_shells = self.number_of_shells

        if normalise_by_shell:
            shell_maxima = histogram_data.groupby("shell").max()

            # Warning! Assignment operator /= changes the original!
            normalised_data = histogram_data / shell_maxima
            histogram_data = normalised_data

        shell_list = []

        for i in range(number_of_shells):
            shell_histogram = histogram_data.loc[i]
            shell_radius = 1 if magnitude_bins is None else magnitude_bins[i + 1]

            shell = self.create_shell_mesh(shell_histogram, shell_radius)

            shell_index_array = np.ones(shell.n_cells, dtype=int) * i
            shell.cell_data["shell"] = shell_index_array

            shell_list.append(shell)

        return shell_list



[docs]
    @abc.abstractmethod
    def convert_vectors_to_cartesian_array(
        self,
        labelled_vectors: pd.DataFrame,
        create_unit_vectors: bool = False,
        include_spatial_coordinates: bool = False,
    ) -> np.ndarray:
        """Convert a set of labelled vectors into Cartesian coordinates.

        Each concrete implementation of a sphere may internally represent
        the vectors differently. This abstract method converts from that
        implementation-specific formatting to Cartesian coordinates.

        Parameters
        ----------
        labelled_vectors
            The set of labelled ``n`` labelled vectors in ``d`` dimensions,
            in the same format as produced by
            :meth:`SphereBase.assign_histogram_bins`.
        create_unit_vectors
            Indicate where the returned vectors should be unit vectors.
            Depending on the implementation, this may either remove an
            extraneous normalisation step later, or add an extra
            normalisation step now.
        include_spatial_coordinates
            Indicate whether to include spatial coordinates in the new
            array. This option may only be called if the vectors have
            spatial coordinates.

        Returns
        -------
        numpy.ndarray
            Array of shape ``(n, d)`` containing the vector components in
            Cartesian coordinates.

        Warnings
        --------
        The option `include_spatial_coordinates` is only valid if the
        `labelled_vectors` include spatial coordinates.
        """

        raise NotImplementedError(
            "This abstract method must be implemented in subclasses."
        )



[docs]
    def get_vectors_from_single_cell(
        self, labelled_vectors: pd.DataFrame, selected_cell: pd.Series
    ) -> pd.DataFrame:
        """Extract vectors from a single selected cell.

        Isolate the vectors contained in a single mesh cell to filter based
        on either pure orientation, or a combination of magnitude and
        orientation.

        Parameters
        ----------
        labelled_vectors
            The set of labelled ``n`` labelled vectors in ``d`` dimensions,
            in the same format as produced by
            :meth:`SphereBase.assign_histogram_bins`.
        selected_cell
            The scalar values from the selected cell, as rows in a
            :class:`~pandas.Series`. The index should contain at least the
            entries in :attr:`.SphereBase.orientation_cols`.

        Returns
        -------
        pandas.DataFrame
            The set of labelled vectors falling in the selected cell. This
            :class:`~pandas.DataFrame` has the same format as
            `labelled_vectors`, but fewer entries.
        """
        # Determine if the filtering will be only based on orientation
        if all(col in selected_cell for col in self.magnitude_shell_cols):
            cols = self.hist_group_cols
        else:
            cols = self.orientation_cols

        # Perform indexing to isolate the vectors of interest
        vectors_in_cell = labelled_vectors.loc[
            np.all(
                selected_cell[cols] == labelled_vectors[cols],
                axis=1,
            )
        ]

        return vectors_in_cell



[docs]
    def get_vectors_from_selected_cells(
        self, labelled_vectors: pd.DataFrame, selected_cells: pd.DataFrame
    ) -> pd.DataFrame:
        """Extract vectors from selected cells.

        Isolate the vectors contained in specified shells and cells in
        order to filter the vector collection by magnitude and orientation.

        Parameters
        ----------
        labelled_vectors
            The set of labelled ``n`` labelled vectors in ``d`` dimensions,
            in the same format as produced by
            :meth:`SphereBase.assign_histogram_bins`.
        selected_cells
            The scalar values from the selected cells. The columns in this
            table should contain at least the entries in
            :attr:`.SphereBase.orientation_cols`.

        Returns
        -------
        pandas.DataFrame
            The set of labelled vectors falling in the selected cells. This
            :class:`~pandas.DataFrame` has the same format as
            `labelled_vectors`, but fewer entries.

        Warnings
        --------
        If the vectors were duplicated for the purpose of visualisation,
        that duplication is **not** preserved here.

        See Also
        --------
        .get_vectors_from_single_cell : Extract vectors from one cell.
        """

        # Apply the filtering to all the selected cells
        filtering_func = partial(self.get_vectors_from_single_cell, labelled_vectors)

        selected_vectors_series = selected_cells.apply(filtering_func, axis="columns")

        # Applying returns a Series of DataFrames, so we must concatenate!
        selected_vectors = pd.concat(selected_vectors_series.to_list())

        return selected_vectors



[docs]
    @abc.abstractmethod
    def get_cell_indices(self, bins: pd.DataFrame) -> pd.Series:
        """Get cell indices for specific bins.

        Get the mesh cell index for specified orientation bins.

        Parameters
        ----------
        bins
            DataFrame containing the implementation-specific orientation
            bin information for the desired cells

        Returns
        -------
        Series
            Indices of the mesh cells corresponding to the desired
            orientation bin.

        See Also
        --------
        .SphereBase.assign_histogram_bins :
            assign specific orientations and magnitudes to histogram bins.
        """

        raise NotImplementedError(
            "Subclasses must implement this abstract method."
        )