Source code for vectorose.sphere_base

"""Basics for spherical histogram construction.

This module contains basic tools for different representations of
optionally-nested spherical histograms.
"""

import abc
from functools import partial
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd
import pandas.core.generic
import pyvista as pv

from . import util


[docs] class SphereBase(abc.ABC): """Base class for a spherical histogram.""" # Attributes number_of_shells: int """Number of shells to consider for bivariate vector histograms.""" magnitude_range: Optional[Tuple[float, float]] """Range for the magnitude values. Maximum and minimum values to consider for the magnitude. If ``None``, then the maximum and minimum values are computed from the provided vectors. """ magnitude_precision: Optional[int] = 8 """Precision with which to round the magnitudes when binning. To avoid floating point errors, the vector magnitudes may be rounded before binning. This option allows the precision of the rounding to be set. If ``None``, then no rounding is performed. """ @property def hist_group_cols(self) -> List[str]: """Names of the histogram columns to use for sorting.""" return self.magnitude_shell_cols + self.orientation_cols @property def magnitude_shell_cols(self) -> List[str]: """Name of the histogram columns to use for magnitude.""" return ["shell"] @property @abc.abstractmethod def orientation_cols(self) -> List[str]: """Name of the histogram columns to use for orientation.""" raise NotImplementedError( "This abstract property must be implemented in subclasses." ) def __init__( self, number_of_shells: int = 1, magnitude_range: Optional[Tuple[float, float]] = None, ): self.number_of_shells = number_of_shells self.magnitude_range = magnitude_range
[docs] def assign_histogram_bins( self, vectors: np.ndarray ) -> Tuple[pd.DataFrame, np.ndarray]: """Assign vectors to the appropriate histogram bin. Parameters ---------- vectors Array of shape ``(n, 3)`` containing the Cartesian components of the vectors from which to construct the histogram. Returns ------- pandas.DataFrame All the vectors, including additional columns for the shell and the implementation-specific orientation bin. numpy.ndarray Histogram bin edges for the magnitude shells. Warnings -------- All zero-vectors must be removed from the dataset before processing. These vectors have no orientation and thus cannot be properly assigned to an orientation bin. """ # Create the vector data frame histogram = util.convert_vectors_to_data_frame(vectors) # Perform any additional histogram preparation histogram = self._initial_vector_data_preparation(histogram) # Perform the magnitude computations magnitude_bins, magnitude_bin_edges = self._compute_magnitude_bins(histogram) histogram = pd.concat([histogram, magnitude_bins], axis=1) # Perform the orientation binning orientation_bins = self._compute_orientation_binning(histogram) histogram = pd.concat([histogram, orientation_bins], axis=1) # Return the complete histogram return histogram, magnitude_bin_edges
[docs] def _initial_vector_data_preparation(self, vectors: pd.DataFrame) -> pd.DataFrame: """Prepare the vectors for histogram construction. Convert the vectors into a representation specific for the histogram spherical implementation. If spatial coordinates are provided for the vectors, these are preserved. Parameters ---------- vectors DataFrame with ``n`` rows and either 3 or 6 columns. The required vector component columns are ``vx, vy, vz``. Optional spatial coordinate columns are ``x, y, z``. It is preferrable (but not required) for the spatial columns to be the first 3 columns. Returns ------- pandas.DataFrame DataFrame containing ``n`` rows and a subclass-specific number of columns. The columns represent an alternative representation of the vectors to assist in orientation binning. If spatial coordinate columns were present in the original data they will be preserved in the output. Warnings -------- This method should typically **not** be overridden. The implementation-specific functionality should be written in the method :meth:`_initial_vector_component_preparation`, which is called by this function. """ processed_vector_data = self._initial_vector_component_preparation(vectors) # Add back the locations, if necessary number_of_columns = len(vectors.columns) if number_of_columns > 3: location_data = vectors.loc[:, ["x", "y", "z"]] processed_vector_data = location_data.join(processed_vector_data) return processed_vector_data
[docs] def _initial_vector_component_preparation( self, vectors: pd.DataFrame ) -> pd.DataFrame: """Prepare the vector components for histogram construction. Override this method to include specific operations that should be performed on the vectors in order to construct the histogram in the specific implementation. Warnings -------- This function should **not** perform any tasks related to the vector spatial locations, if those are included in the data. Those are handled separately by :meth:`._initial_vector_data_preparation` which calls this function. """ vector_components = vectors.loc[:, ["vx", "vy", "vz"]] return vector_components
[docs] def _compute_magnitude_bins( self, vectors: pd.DataFrame ) -> Tuple[pd.Series, np.ndarray]: """Perform binning based on magnitude. Construct the magnitude histogram for the provided vectors. Parameters ---------- vectors The vectors from which the magnitude histogram is to be constructed. Returns ------- pandas.Series The magnitude shell number for each vector, in a :class:`pandas.Series` called ``shell``. numpy.ndarray Array containing the histogram bin boundaries used to construct the histogram. The length of this array corresponds is one more than :attr:`SphereBase.number_of_shells`. """ magnitudes = vectors.loc[:, "magnitude"] # Define the magnitude bin edges if self.number_of_shells > 1: if self.magnitude_range is None: offset = 10 ** -(self.magnitude_precision or 8) max_magnitude = magnitudes.max() + offset min_magnitude = magnitudes.min() - offset magnitude_range = (min_magnitude, max_magnitude) else: magnitude_range = self.magnitude_range magnitude_bin_edges = np.histogram_bin_edges( magnitudes, bins=self.number_of_shells, range=magnitude_range ) # Don't consider the initial bin edge. internal_bin_edges = magnitude_bin_edges[1:] # Round the magnitudes, if requested if self.magnitude_precision is not None: magnitudes = np.round(magnitudes, self.magnitude_precision) # Assign the vectors the correct bins magnitude_histogram_bins = np.digitize( magnitudes, internal_bin_edges, right=True ) else: magnitude_histogram_bins = np.zeros(len(magnitudes), dtype=int) # Set the minimum bin to be the smallest dataset value lower_bin = magnitudes.min() # Set the upper bin to be just above the maximum value upper_bin = magnitudes.max() + (10 ** -(self.magnitude_precision - 1)) magnitude_bin_edges = np.array([lower_bin, upper_bin]) magnitude_histogram_bins = pd.Series(magnitude_histogram_bins, name="shell") return magnitude_histogram_bins, magnitude_bin_edges
[docs] @abc.abstractmethod def _compute_orientation_binning( self, vectors: pd.DataFrame ) -> pd.core.generic.NDFrame: """Bin the provided vectors based on orientation. Parameters ---------- vectors The vectors to place in orientation bins. Returns ------- pandas.Series or pandas.DataFrame The orientation bin(s) corresponding to each vector. The number of columns will depend on the specific sphere representation used. """ raise NotImplementedError("Subclasses must implement this abstract method!")
[docs] def _construct_histogram_index(self) -> pd.MultiIndex: """Construct the index for the histogram.""" magnitude_index = self._construct_magnitude_index() orientation_index = self._construct_orientation_index() magnitude_index_arr = magnitude_index.to_frame(index=False).to_numpy() orientation_index_arr = orientation_index.to_frame(index=False).to_numpy() number_of_shells = len(magnitude_index_arr) number_of_orientations = len(orientation_index_arr) # Repeat each index magnitude_index_complete = np.repeat( magnitude_index_arr, number_of_orientations, axis=0 ) orientation_index_complete = np.tile( orientation_index_arr, (number_of_shells, 1) ) raw_index_arrays = [magnitude_index_complete, orientation_index_complete] headers_arrays = [self.magnitude_shell_cols, self.orientation_cols] # And now combine everything! index_array = np.concatenate(raw_index_arrays, axis=-1) headers = np.concatenate(headers_arrays) # And build a data frame from this index_data_frame = pd.DataFrame(index_array, columns=headers) multi_index = pd.MultiIndex.from_frame(index_data_frame) return multi_index
[docs] def _construct_magnitude_index(self) -> pd.Index: """Construct the index for the magnitude bins.""" index = pd.RangeIndex( start=0, stop=self.number_of_shells, name=self.magnitude_shell_cols[0] ) return index
[docs] @abc.abstractmethod def _construct_orientation_index(self) -> pd.Index: """Construct the index for the orientation bins.""" raise NotImplementedError("Subclasses must implement this abstract method!")
[docs] def construct_histogram( self, binned_data: pd.DataFrame, return_fraction: bool = True, ) -> pd.Series: """Construct a histogram based on the labelled data. Using the binned data, construct a histogram with either the counts or the proportion of points in each face. Parameters ---------- binned_data All vectors, with their respective bins, depending on the current sphere design. return_fraction Indicate whether the values returned should be the raw counts or the proportions. Returns ------- pandas.Series The counts or proportions of vectors in each case, ordered by the columns specified in :attr:`SphereBase.hist_group_cols`. """ grouping_columns = self.hist_group_cols # Use groupby to perform the grouping original_histogram = binned_data.groupby(grouping_columns).apply( len, include_groups=False ) # Modify the index to account for any missing bins. multi_index = self._construct_histogram_index() filled_histogram = original_histogram.reindex(index=multi_index, fill_value=0) filled_histogram.name = "frequency" if return_fraction: number_of_vectors = len(binned_data) filled_histogram /= number_of_vectors return filled_histogram
[docs] def construct_marginal_magnitude_histogram( self, binned_data: pd.DataFrame, return_fraction: bool = True ) -> pd.Series: """Construct the marginal magnitude histogram. Compute the marginal histogram of the magnitude data, disregarding the orientation differences. The resulting histogram has the same number of bins as the number of shells. Parameters ---------- binned_data Data frame containing the labelled vectors. return_fraction Indicate whether the values returned should be the raw counts or the proportions. Returns ------- pandas.Series The counts or proportions of vectors in each magnitude shell. See Also -------- SphereBase.assign_histogram_bins: Label a set of vectors into different bins. SphereBase.construct_histogram: Construct a bivariate magnitude and orientation histogram. SphereBase.construct_marginal_orientation_histogram: Construct a marginal orientation histogram. """ # Group based only on the magnitude bin counts_by_shell = binned_data.groupby(self.magnitude_shell_cols).apply( len, include_groups=False ) # Construct the index (in case some bins are zero). magnitude_index = self._construct_magnitude_index() magnitude_histogram = counts_by_shell.reindex( index=magnitude_index, fill_value=0 ) if return_fraction: number_of_vectors = len(binned_data) magnitude_histogram /= number_of_vectors return magnitude_histogram
[docs] def construct_marginal_orientation_histogram( self, binned_data: pd.DataFrame, return_fraction: bool = True ) -> pd.Series: """Construct the marginal orientation histogram. Compute the marginal histogram of the orientation data, disregarding the magnitude differences. The resulting histogram has the same configuration of bins as a single shell. Parameters ---------- binned_data Data frame containing the labelled vectors. return_fraction Indicate whether the values returned should be the raw counts or the proportions. Returns ------- pandas.Series The counts or proportions of vectors in each orientation bin. See Also -------- SphereBase.assign_histogram_bins: Label a set of vectors into different bins. SphereBase.construct_histogram: Construct a bivariate magnitude and orientation histogram. SphereBase.construct_marginal_magnitude_histogram: Construct a marginal magnitude histogram. """ # Group based on only the orientation data counts_by_orientation = binned_data.groupby(self.orientation_cols).apply( len, include_groups=False ) # Construct the index (in case some orientations are zero). orientation_index = self._construct_orientation_index() orientation_index.names = self.orientation_cols orientation_histogram = counts_by_orientation.reindex( orientation_index, fill_value=0 ) if return_fraction: number_of_vectors = len(binned_data) orientation_histogram /= number_of_vectors return orientation_histogram
[docs] def construct_conditional_orientation_histogram( self, binned_data: pd.DataFrame ) -> pd.Series: """Construct the conditional orientation histogram. Construct the histogram of orientations conditioned on the magnitude. Within each shell, the returned fractions sum to 1. Parameters ---------- binned_data Data frame containing the labelled vectors. Returns ------- pandas.Series The proportion of vectors in each orientation relative to all vectors within that shell. The index used is the same as that obtained in the bivariate case. Warnings -------- Unlike the bivariate and marginal histograms, this method does not allow returning raw counts. The returned values are proportions relative to each shell. """ # Get the bivariate histogram with the counts bivariate_histogram = self.construct_histogram( binned_data, return_fraction=False ) # And now, get the marginal magnitude histogram marginal_magnitude_histogram = self.construct_marginal_magnitude_histogram( binned_data, return_fraction=False ) # Divide the bivariate distribution by the marginal to get the # conditional distribution. orientation_given_magnitude = bivariate_histogram / marginal_magnitude_histogram return orientation_given_magnitude
[docs] def construct_conditional_magnitude_histogram( self, binned_data: pd.DataFrame ) -> pd.Series: """Construct the conditional magnitude histogram. Construct the histogram of magnitudes conditioned on the orientation. Within each orientation bin, the returned fractions sum to 1. Parameters ---------- binned_data Data frame containing the labelled vectors. Returns ------- pandas.Series The proportion of vectors in each magnitude shell relative to all vectors having that orientation. The index used is the same as that obtained in the bivariate case, having the magnitude first, followed by the orientation parameters. Warnings -------- Unlike the bivariate and marginal histograms, this method does not allow returning raw counts. The returned values are proportions relative to each shell. """ # Get the bivariate histogram with the counts bivariate_histogram = self.construct_histogram( binned_data, return_fraction=False, ) # And now, get the marginal magnitude histogram marginal_orientation_histogram = self.construct_marginal_orientation_histogram( binned_data, return_fraction=False ) # Divide the bivariate distribution by the marginal to get the # conditional distribution. magnitude_given_orientation = ( bivariate_histogram / marginal_orientation_histogram ) return magnitude_given_orientation
[docs] @abc.abstractmethod def create_mesh(self) -> pv.PolyData: """Return the mesh representation of the current sphere.""" raise NotImplementedError("Subclasses must implement this abstract method!")
[docs] def create_shell_mesh( self, histogram: pd.Series, radius: float = 1.0, series_name: Optional[str] = "frequency", ) -> pv.PolyData: """Create the mesh for a given shell. Using the provided histogram data for a specific shell, produce a sphere with the desired radius, storing the frequencies as face values. Parameters ---------- histogram The counts or frequencies of orientations in each sphere face of the specific shell. radius Desired shell radius. This typically corresponds to a magnitude bin upper bound. series_name The name to associate with the provided scalar data. If `None`, then the value of :attr:`pandas.Series.name` is used. Returns ------- pyvista.PolyData The constructed shell containing the desired scalars in the specified slot. """ # First, construct the mesh that will underlie the shell shell_mesh = self.create_mesh() # Now, adjust the radius shell_mesh = shell_mesh.scale(radius) # Get the name series_name = series_name or histogram.name # Set the scalar values shell_mesh.cell_data[series_name] = histogram.astype(float) return shell_mesh
[docs] def create_histogram_meshes( self, histogram_data: pd.Series, magnitude_bins: Optional[np.ndarray], normalise_by_shell: bool = False, ) -> List[pv.PolyData]: """Create mesh shells for the supplied histogram. Parameters ---------- histogram_data The binned histogram data, ordered by shell and then other implementation-specific parameters. magnitude_bins The upper bounds for the magnitude bins. These are used to determine the radius of each shell. If None, then all shells will have a radius of 1. normalise_by_shell Indicate whether each shell should be normalised with respect to its maximum value. Returns ------- list of pyvista.PolyData List containing one mesh for each shell, with the appropriate scalar values assigned to the ``frequency`` array. Warnings -------- The provided histogram must have been constructed with the current sphere, or an equivalent sphere. Notes ----- The option `normalise_by_shell` produces meshes where the faces values are divided by the maximum value in their corresponding shell. The values can therefore be thought of as representing fractions of the respective maxima. """ number_of_shells = self.number_of_shells if normalise_by_shell: shell_maxima = histogram_data.groupby("shell").max() # Warning! Assignment operator /= changes the original! normalised_data = histogram_data / shell_maxima histogram_data = normalised_data shell_list = [] for i in range(number_of_shells): shell_histogram = histogram_data.loc[i] shell_radius = 1 if magnitude_bins is None else magnitude_bins[i + 1] shell = self.create_shell_mesh(shell_histogram, shell_radius) shell_index_array = np.ones(shell.n_cells, dtype=int) * i shell.cell_data["shell"] = shell_index_array shell_list.append(shell) return shell_list
[docs] @abc.abstractmethod def convert_vectors_to_cartesian_array( self, labelled_vectors: pd.DataFrame, create_unit_vectors: bool = False, include_spatial_coordinates: bool = False, ) -> np.ndarray: """Convert a set of labelled vectors into Cartesian coordinates. Each concrete implementation of a sphere may internally represent the vectors differently. This abstract method converts from that implementation-specific formatting to Cartesian coordinates. Parameters ---------- labelled_vectors The set of labelled ``n`` labelled vectors in ``d`` dimensions, in the same format as produced by :meth:`SphereBase.assign_histogram_bins`. create_unit_vectors Indicate where the returned vectors should be unit vectors. Depending on the implementation, this may either remove an extraneous normalisation step later, or add an extra normalisation step now. include_spatial_coordinates Indicate whether to include spatial coordinates in the new array. This option may only be called if the vectors have spatial coordinates. Returns ------- numpy.ndarray Array of shape ``(n, d)`` containing the vector components in Cartesian coordinates. Warnings -------- The option `include_spatial_coordinates` is only valid if the `labelled_vectors` include spatial coordinates. """ raise NotImplementedError( "This abstract method must be implemented in subclasses." )
[docs] def get_vectors_from_single_cell( self, labelled_vectors: pd.DataFrame, selected_cell: pd.Series ) -> pd.DataFrame: """Extract vectors from a single selected cell. Isolate the vectors contained in a single mesh cell to filter based on either pure orientation, or a combination of magnitude and orientation. Parameters ---------- labelled_vectors The set of labelled ``n`` labelled vectors in ``d`` dimensions, in the same format as produced by :meth:`SphereBase.assign_histogram_bins`. selected_cell The scalar values from the selected cell, as rows in a :class:`~pandas.Series`. The index should contain at least the entries in :attr:`.SphereBase.orientation_cols`. Returns ------- pandas.DataFrame The set of labelled vectors falling in the selected cell. This :class:`~pandas.DataFrame` has the same format as `labelled_vectors`, but fewer entries. """ # Determine if the filtering will be only based on orientation if all(col in selected_cell for col in self.magnitude_shell_cols): cols = self.hist_group_cols else: cols = self.orientation_cols # Perform indexing to isolate the vectors of interest vectors_in_cell = labelled_vectors.loc[ np.all( selected_cell[cols] == labelled_vectors[cols], axis=1, ) ] return vectors_in_cell
[docs] def get_vectors_from_selected_cells( self, labelled_vectors: pd.DataFrame, selected_cells: pd.DataFrame ) -> pd.DataFrame: """Extract vectors from selected cells. Isolate the vectors contained in specified shells and cells in order to filter the vector collection by magnitude and orientation. Parameters ---------- labelled_vectors The set of labelled ``n`` labelled vectors in ``d`` dimensions, in the same format as produced by :meth:`SphereBase.assign_histogram_bins`. selected_cells The scalar values from the selected cells. The columns in this table should contain at least the entries in :attr:`.SphereBase.orientation_cols`. Returns ------- pandas.DataFrame The set of labelled vectors falling in the selected cells. This :class:`~pandas.DataFrame` has the same format as `labelled_vectors`, but fewer entries. Warnings -------- If the vectors were duplicated for the purpose of visualisation, that duplication is **not** preserved here. See Also -------- .get_vectors_from_single_cell : Extract vectors from one cell. """ # Apply the filtering to all the selected cells filtering_func = partial(self.get_vectors_from_single_cell, labelled_vectors) selected_vectors_series = selected_cells.apply(filtering_func, axis="columns") # Applying returns a Series of DataFrames, so we must concatenate! selected_vectors = pd.concat(selected_vectors_series.to_list()) return selected_vectors
[docs] @abc.abstractmethod def get_cell_indices(self, bins: pd.DataFrame) -> pd.Series: """Get cell indices for specific bins. Get the mesh cell index for specified orientation bins. Parameters ---------- bins DataFrame containing the implementation-specific orientation bin information for the desired cells Returns ------- Series Indices of the mesh cells corresponding to the desired orientation bin. See Also -------- .SphereBase.assign_histogram_bins : assign specific orientations and magnitudes to histogram bins. """ raise NotImplementedError( "Subclasses must implement this abstract method." )