Source code for lipyphilic.lib.plotting

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# lipyphilic --- lipyphilic.readthedocs.io
#
# Released under the GNU Public Licence, v2 or any higher version
#

"""Plotting utilities --- :mod:`lipyphilic.lib.plotting`
========================================================

:Author: Paul Smith
:Year: 2021
:Copyright: GNU Public License v2

Generally, **lipyphilic** is not a plotting library --- everyone has their favourite plotting
tool and aesthetics and so plotting is generally left up to the user. However, some plots
are complex to make, requiring further processing of results or lots of boilerplate code
to get the end result.

This module provides methods for plotting joint probability densities and lateral
distribution maps of lipid properties projected onto the membrane plane.

The class :class:`lipyphilic.lib.plotting.ProjectionPlot` can be used, for example, to plot
the area per lipid projected onto the membrane plane, i.e. plot the area per lipid
as a function of :math:`xy`. See `Gu et al. (2020)
<https://pubs.acs.org/doi/full/10.1021/jacs.9b11057>`__ for examples of these
projection plots.

The class :class:`lipyphilic.lib.plotting.JointDensity` can be used, for example, to
plot a 2D PMF of cholesterol orientation and height in a lipid membrane. See
`Gu et al. (2019) <https://pubs.acs.org/doi/10.1021/acs.jctc.8b00933>`__
for an example of the this 2D PMF.

The classes and their methods
-----------------------------

.. autoclass:: ProjectionPlot
    :members:

.. autoclass:: JointDensity
    :members:
"""

import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn as sns


[docs]class ProjectionPlot:
    """Plot membrane properties as a function of `xy`. See
    
    This class can be used for plotting membrane properties projected onto the :math:`xy` plane.
    This is useful, for example, for detecting phase separation in lipid membranes.
    
    The plotted data are stored in the `.statistic` attribute. This means, if you plot separately
    the projection of a membrane property of lipids in the upper and lower leaflets, you can easily
    calculate the correlation coefficient of this property across the leaflets.
    
    """
    def __init__(self, x_pos, y_pos, values):
        
        self.x_pos = x_pos
        self.y_pos = y_pos
        self.values = values
        
        self.x_edges = None
        self.y_edges = None
        
        self.fig = None
        self.ax = None
        self.cbar = None
        
[docs]    def project_values(self, bins, statistic="mean"):
        """Discretise the membrane and project values onto the xy-plane
        
        Parameters
        ----------
        bins: int or array_like or [int, int] or [array, array]
            The bin specification:
            
            ``int``
              If int, the number of bins for the two dimensions (nx=ny=bins).
              
            ``array-like``
              If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
              
            ``[int, int]``
              If [int, int], the number of bins in each dimension (nx, ny = bins).
              
            ``[array, array]``
              If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
              
            ``combination``
              A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
              
        statistic : string or callable, optional
            The statistic to project onto the membrae plane (the default is 'mean').
            The following statistics are available:

            ``mean``
              compute the mean of values for points within each bin.
              Empty bins will be represented by NaN.
            ``std```
              compute the standard deviation within each bin.
            ``median``
              compute the median of values for points within each
              bin. Empty bins will be represented by NaN.
            ``count``
              compute the count of points within each bin.  This is
              identical to an unweighted histogram. The value of the
              membrane property is not referenced.
            ``sum``
              compute the sum of values for points within each bin.
              This is identical to a weighted histogram.
            ``min``
              compute the minimum of values for points within each bin.
              Empty bins will be represented by NaN.
            ``max``
              compute the maximum of values for point within each bin.
              Empty bins will be represented by NaN.
            ``function``
              a user-defined function which takes a 1D array of
              values, and outputs a single numerical statistic. This function
              will be called on the values in each bin.  Empty bins will be
              represented by function([]), or NaN if this returns an error.
        """
        
        self.statistic, self.x_edges, self.y_edges, _ = scipy.stats.binned_statistic_2d(
            x=self.x_pos,
            y=self.y_pos,
            values=self.values,
            bins=bins,
            statistic=statistic,
        )
        
[docs]    def interpolate(self, tile=True, method="linear", fill_value=np.NaN, ):
        """Interpolate NaN values in the projection array.

        Uses scipy.interpolate.griddata to interpolate missing values and
        optionally remove NaN values.

        Parameters
        ----------
        tile: bool, optional
            If `True`, the xy values will be tiled on a (3, 3) grid to reproduce the
            effect of periodic boundary conditions. If `False`, no periodic boundary conditions
            are taken into account when interpolating.
        method: {'linear', 'nearest', 'cubic'}, optional
            Method of interpolation. One of:

            ``nearest``
            return the value at the data point closest to
            the point of interpolation. See SciPy's
            `NearestNDInterpolator` for more details.

            ``linear``
            tessellate the input point set to N-D
            simplices, and interpolate linearly on each simplex.
            See SciPy's `LinearNDInterpolator` for more details.

            ``cubic``
            return the value determined from a
            piecewise cubic, continuously differentiable (C1), and
            approximately curvature-minimizing polynomial surface. See
            SciPy's `CloughTocher2DInterpolator` for more details.

        fill_value : float, optional
            Value used to fill in for requested points outside of the
            convex hull of the input points. This option has no effect for the
            'nearest' method. If not provided, then the these points will
            have NaN values.
        rescale : bool, optional
            Rescale points to unit cube before performing interpolation.
            This is useful if some of the input dimensions have
            incommensurable units and differ by many orders of magnitude.
        """
            
        statistic_nbins_x, statistic_nbins_y = self.statistic.shape
        statistic = np.tile(self.statistic, reps=(3, 3)) if tile else self.statistic
            
        # this snippet is taken from: https://stackoverflow.com/a/37882746
        x, y = np.indices(statistic.shape)
        
        statistic[np.isnan(statistic)] = scipy.interpolate.griddata(
            (x[~np.isnan(statistic)], y[~np.isnan(statistic)]),  # points we know
            statistic[~np.isnan(statistic)],                     # values we know
            (x[np.isnan(statistic)], y[np.isnan(statistic)]),    # points to interpolate
            method=method,
        )
        
        if tile:
            self.statistic = statistic[statistic_nbins_x:statistic_nbins_x * 2, statistic_nbins_y:statistic_nbins_y * 2]
        else:
            self.statistic = statistic
    
[docs]    def plot_projection(
        self,
        ax=None,
        title=None,
        xlabel=None,
        ylabel=None,
        cmap=None,
        vmin=None, vmax=None,
        cbar=True,
        cbar_kws=None,
        imshow_kws=None,
    ):
        """Plot the 2D projection of a membrane property.
        
        Use matplotlib.pyplot.imshow to plot a heatmap of the values.
        
        Parameters
        ----------
        ax: Axes, optional
            Matplotlib Axes on which to plot the projection. The default is `None`,
            in which case a new figure and axes will be created.
        title: str, optional
            Title for the plot. By default, there is no title.
        xlabel: str, optional
            Label for the x-axis. By default, there is no label on the x-axis.
        ylabel: str, optional
            Label for the y-axis. By default, there is no label on the y-axis.
        cmap : str or `~matplotlib.colors.Colormap`, optional
            The Colormap instance or registered colormap name used to map
            scalar data to colors.
        vmin, vmax : float, optional
            Define the data range that the colormap covers. By default,
            the colormap covers the complete value range of the supplied
            data.
        cbar : bool, optional
            Whether or not to add a colorbar to the plot.
        cbar_kws : dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.colorbar.
        imshow_kws : dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.imshow, which
            is used to plot the 2D density map.
            
        Returns
        -------
        ProjectionPlot.fig
            Matplotlib Figure on which the plot was drawn.
        ProjectionPlot.ax
            Matplotlib Axes on which the plot was drawn.
        ProjectionPlot.cbar
            If a colorbar was added to the plot, this is the Matplotlib colorbar instance for
            ProjectionPlot.ax. Otherwise it is `None`.
        """
        
        # Determine where to plot the figure
        if ax is None:
            self.fig, self.ax = plt.subplots(1, figsize=(3, 3))
        else:
            self.fig = plt.gcf()
            self.ax = ax
        plt.sca(self.ax)
        
        # imshow transposes the data
        values = self.statistic.T

        cbar_kws = dict() if cbar_kws is None else cbar_kws
        imshow_kws = dict() if imshow_kws is None else imshow_kws

        # we cannot pass vmin/vmax to imshow if norm is also passed
        if "norm" in imshow_kws:
            vmin = min(imshow_kws['norm'].boundaries)
            vmax = max(imshow_kws['norm'].boundaries)

        # we need vmin and vmax to be set to sensible values
        # to ensure the colorbar labels looks reasonable
        # And to clip the values
        if vmin is None:
            vmin = np.floor(np.nanmin(values))
        if vmax is None:
            vmax = np.ceil(np.nanmax(values))
        
        values = values.clip(vmin, vmax)

        if "norm" not in imshow_kws:
            imshow_kws['vmin'] = vmin
            imshow_kws['vmax'] = vmax

        # Detmine which cmap to use
        if cmap is None:
            cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True, reverse=True)
        
        # Finally we can plot the density/PMF
        self._imshow = plt.imshow(
            values,
            origin='lower',  # this is necessary to make sure the y-axis is not inverted
            cmap=cmap,
            **imshow_kws
        )
            
        # And add a colourbar if necessary
        if cbar:
                    
            if "aspect" not in cbar_kws:
                cbar_kws["aspect"] = 30
                
            if "pad" not in cbar_kws:
                cbar_kws["pad"] = 0.025
            
            self.cbar = self.fig.colorbar(self._imshow, **cbar_kws)
        
        self.ax.set_title(title, loc="left", weight="bold")
        self.ax.set_xlabel(xlabel)
        self.ax.set_ylabel(ylabel)
        self.ax.set_aspect('equal', adjustable='box')
        self.ax.tick_params(axis="both", which="major", direction="inout", right=True, top=True)
        self.ax.set_xticks([])
        self.ax.set_yticks([])


[docs]class JointDensity:
    """Calculate and plot the joint probability density of two observables.
    
    """
    def __init__(self, ob1, ob2):
        """Set up parameters for calculating joint densities.
        
        Parameters
        ----------
        ob1: array_like
            An array containing values of the first observable.
        ob2: array_like
            An array containing values of the second observable. It **must** have the same shape as `ob1`
        """
        
        if np.array(ob1).shape != np.array(ob2).shape:
            raise ValueError("`ob1` and `ob2` must be arrays of the same shape.")
        
        self.ob1 = np.array(ob1)
        self.ob2 = np.array(ob2)
        
        self.ob1_mesh_bins = None
        self.ob2_mesh_bins = None
        self.joint_mesh_values = None
        
        self.temperature = None
        self.fig = None
        self.ax = None
        self.cbar = None
        
[docs]    def calc_density_2D(self, bins, filter_by=None, temperature=None):
        """Calculate the joint probability density of two observables.
        
        If a tempearutre is provided, the PMF is calculated directly from the probability
        distribution.
        
        Parameters
        ----------
        bins: int or array_like or [int, int] or [array, array]
            The bin specification:
            
            ``int``
              If int, the number of bins for the two dimensions (nx=ny=bins).
              
            ``array-like``
              If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
              
            ``[int, int]``
              If [int, int], the number of bins in each dimension (nx, ny = bins).
              
            ``[array, array]``
              If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
              
            ``combination``
              A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
              
        filter_by: 2D numpy array of shape (n_residues, n_frames), optional
            A boolean mask for filtering lipids or frames. The default is `None`, in which case
            no filtering is performed.
        temperature: float, optional
            Temperature of the system, which will be used to convert the 2D density into
            a PMF. The default is `None`, in which case the density is returned rather than
            the PMF.
        """
    
        if filter_by is not None:
            
            filter_by = np.array(filter_by)
            
            if filter_by.shape != self.ob1.shape:
                raise ValueError("`filter_by` must be an array with the same shape as `ob1` and `ob2`.")
            
            density, ob1_bin_edges, ob2_bin_edges = np.histogram2d(
                self.ob1[filter_by].flatten(), self.ob2[filter_by].flatten(), density=True, bins=bins
            )
        
        else:
            density, ob1_bin_edges, ob2_bin_edges = np.histogram2d(
                self.ob1.flatten(), self.ob2.flatten(), density=True, bins=bins
            )

        # We need to create a grid for plotting with imshow
        self.ob1_mesh_bins, self.ob2_mesh_bins = np.meshgrid(
            ob1_bin_edges[:-1] + (ob1_bin_edges[1] - ob1_bin_edges[0]) / 2,
            ob2_bin_edges[:-1] + (ob2_bin_edges[1] - ob2_bin_edges[0]) / 2,
        )

        # determine whether we use probability density or PMF
        self.temperature = temperature
        if self.temperature is not None:

            ln_density = np.log(density)
            ln_density[~np.isfinite(ln_density)] = np.NaN
            free_energy = -(scipy.constants.Boltzmann * scipy.constants.Avogadro / 4184) * temperature * ln_density
            self.joint_mesh_values = free_energy

        else:
            self.joint_mesh_values = density
            
[docs]    def interpolate(self, method="linear", fill_value=None, rescale=True):
        """Interpolate NaN values in the joint probability density or PMF.

        Uses scipy.interpolate.griddata to interpolate the joint density and
        optionally remove NaN values.

        Parameters
        ----------
        method: {'linear', 'nearest', 'cubic'}, optional
            Method of interpolation. One of:

            ``nearest``
              return the value at the data point closest to
              the point of interpolation. See SciPy's
              `NearestNDInterpolator` for more details.

            ``linear``
              tessellate the input point set to N-D
              simplices, and interpolate linearly on each simplex.
              See SciPy's `LinearNDInterpolator` for more details.

            ``cubic``
              return the value determined from a
              piecewise cubic, continuously differentiable (C1), and
              approximately curvature-minimizing polynomial surface. See
              SciPy's `CloughTocher2DInterpolator` for more details.

        fill_value : float, optional
            Value used to fill in for requested points outside of the
            convex hull of the input points. This option has no effect for the
            'nearest' method. If not provided, then the
            default is to use the maximum free energy value if a PMF was
            calculated, or 0 otherwise.

        rescale : bool, optional
            Rescale points to unit cube before performing interpolation.
            This is useful if some of the input dimensions have
            incommensurable units and differ by many orders of magnitude.
        """

        # this snippet is taken from: https://stackoverflow.com/a/37882746
        x, y = np.indices(self.joint_mesh_values.shape)
        
        if fill_value is None:
            if self.temperature is None:
                fill_value = 0.0
            else:
                fill_value = np.nanmax(self.joint_mesh_values)
        
        self.joint_mesh_values[np.isnan(self.joint_mesh_values)] = scipy.interpolate.griddata(
            (x[~np.isnan(self.joint_mesh_values)], y[~np.isnan(self.joint_mesh_values)]),  # points we know
            self.joint_mesh_values[~np.isnan(self.joint_mesh_values)],                     # values we know
            (x[np.isnan(self.joint_mesh_values)], y[np.isnan(self.joint_mesh_values)]),    # points to interpolate
            method=method,
            fill_value=fill_value,
            rescale=rescale
        )

[docs]    def plot_density(
        self,
        difference=None,
        ax=None,
        title=None,
        xlabel=None,
        ylabel=None,
        cmap=None,
        vmin=None, vmax=None,
        n_contours=4, contour_labels=None,
        cbar=True,
        cbar_kws=None,
        imshow_kws=None,
        contour_kws=None,
        clabel_kws=None,
    ):
        """Plot the 2D density or PMF.
        
        Use matplotlib.pyplot.imshow to plot a heatmap of the density.
        
        Optionally, add contour lines using matplotlib.pyplot.contour and label the contours
        with their values.
        
        Parameters
        ----------
        difference: JointDensity, optional
            A JointDensity object for which the probability density or PMF has been calculated.
            Before ploting, the density or PMF of `difference` will be subtracted from the
            density of PMF of this object. This is useful for plotting difference in PMFs due
            to e.g a change in membrane lipid composition.
        ax: Axes, optional
            Matplotlib Axes on which to plot the 2D denstiy. The default is `None`,
            in which case a new figure and axes will be created.
        title: str, optional
            Title for the plot. By default, there is no title.
        xlabel: str, optional
            Label for the x-axis. By default, there is no label on the x-axis.
        ylabel: str, optional
            Label for the y-axis. By default, there is no label on the y-axis.
        cmap : str or `~matplotlib.colors.Colormap`, optional
            The Colormap instance or registered colormap name used to map
            scalar data to colors.
        vmin, vmax : float, optional
            Define the data range that the colormap covers. By default,
            the colormap covers the complete value range of the supplied
            data.
        n_contours: int or array-like, optional
            Determines the number and positions of the contour lines / regions
            plotted with matplotlib.pyplot.contour:

            ``int``
                If an int *n*, use `~matplotlib.ticker.MaxNLocator`, which tries
                to automatically choose no more than *n+1* "nice" contour levels
                between *vmin* and *vmax*.
            
            ``array-like``
                If array-like, draw contour lines at the specified levels.
                The values must be in increasing order.
            
            ``0``
                If 0, no contour lines are drawn.
        
        contour_labels : array-like, optional
            A list of contour level indices specifyig which levles should be labeled.
            The default is `None`, in which case no contours are labeled.
        cbar : bool, optional
            Whether or not to add a colorbar to the plot.
        cbar_kws : dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.colorbar.
        imshow_kws : dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.imshow, which
            is used to plot the 2D density map.
        contour_kws : dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.contour, which
            is used to plot the contour lines.
        clabel_kws: dict, optional
            A dictionary of keyword options to pass to matplotlib.pyplot.contour, which
            is used to add labels to the contour lines.
            
        Returns
        -------
        JointDensity.fig
            Matplotlib Figure on which the plot was drawn.
        JointDensity.ax
            Matplotlib Axes on which the plot was drawn.
        JointDensity.cbar
            If a colorbar was added to the plot, this is the Matplotlib colorbar instance for
            JointDensity.ax. Otherwise it is `None`.
        """
        
        # Determine where to plot the figure
        if ax is None:
            self.fig, self.ax = plt.subplots(1, figsize=(3, 3))
        else:
            self.fig = plt.gcf()
            self.ax = ax
        plt.sca(self.ax)
        
        values = self.joint_mesh_values.T - difference.joint_mesh_values.T if difference is not None else self.joint_mesh_values.T

        cbar_kws = dict() if cbar_kws is None else cbar_kws
        imshow_kws = dict() if imshow_kws is None else imshow_kws
        contour_kws = dict() if contour_kws is None else contour_kws
        clabel_kws = dict() if clabel_kws is None else clabel_kws

        # we cannot pass vmin/vmax to imshow if norm is also passed
        if "norm" in imshow_kws:
            vmin = min(imshow_kws['norm'].boundaries)
            vmax = max(imshow_kws['norm'].boundaries)
        
        # we need vmin and vmax to be set to sensible values
        # to ensure the colorbar labels looks reasonable
        # And to clip the values
        if vmin is None and self.temperature is not None:
            vmin = min(0.0, np.floor(np.nanmin(values)))
        elif vmin is None:
            vmin = np.nanmin(values)
        if vmax is None and self.temperature is not None:
            vmax = max(0.0, np.ceil(np.nanmax(values)))
        elif vmax is None:
            vmax = np.nanmax(values)
        
        # make sure the colourbar is centered at zero if we're looking doing a difference plot
        if difference is not None and "norm" not in imshow_kws:
            vmax = max(vmax, abs(vmin))
            vmin = -vmax

        if "norm" not in imshow_kws:
            imshow_kws['vmin'] = vmin
            imshow_kws['vmax'] = vmax
        
        values = values.clip(vmin, vmax)

        # Add contours if necessary
        if "colors" not in contour_kws.keys():
            contour_kws["colors"] = "xkcd:dark grey"
        self._contours = contours = plt.contour(
            self.ob1_mesh_bins, self.ob2_mesh_bins, values,
            levels=n_contours,
            **contour_kws
        )
        
        # And contour labels
        if contour_labels is not None:
            levels = np.array(contours.levels)
            self._clabels = plt.clabel(contours, levels=levels[contour_labels], **clabel_kws)

        # Get the extent of the distributions
        # This is necessary for imshow
        dx = np.diff(self.ob1_mesh_bins[0][:2])[0]
        dy = np.diff(self.ob2_mesh_bins[:, 0][:2])[0]
        extent = [
            self.ob1_mesh_bins[0][0] - dx / 2,
            self.ob1_mesh_bins[0][-1] + dx / 2 + 1,
            self.ob2_mesh_bins[0][0] - dy / 2,
            self.ob2_mesh_bins[-1][0] + dy / 2,
        ]
        
        # Detmine which cmap to use
        if cmap is None and difference is None:
            cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True, reverse=True)
        elif cmap is None:
            cmap = sns.color_palette(palette="RdBu_r", n_colors=200, as_cmap=True)
        
        # Finally we can plot the density/PMF
        self._imshow = plt.imshow(
            values,
            origin='lower',  # this is necessary to make sure the y-axis is not inverted
            extent=extent,
            cmap=cmap,
            **imshow_kws
        )
            
        # And add a colourbar if necessary
        if cbar:
            
            if "label" not in cbar_kws:
                if self.temperature is not None and difference is not None:
                    cbar_kws["label"] = r"$\Delta\, \rm PMF$"  # pragma: no cover # testing for this label works locally but fails with tox/Travis
                    
                elif self.temperature is not None:
                    cbar_kws["label"] = "PMF"  # pragma: no cover # testing for this label works locally but fails with tox/Travis
                else:
                    cbar_kws["label"] = "Probability density"

            if "pad" not in cbar_kws:
                cbar_kws["pad"] = 0.025
            
            self.cbar = self.fig.colorbar(self._imshow, **cbar_kws)
            
            # For some reason the aspect is not set by passing it as a keyword
            aspect = cbar_kws["aspect"] if "aspect" in cbar_kws else 30
            self.cbar.ax.set_aspect(aspect)

            ticks = self.cbar.get_ticks()
            labels = ticks.round(2).astype(str)
            labels[-1] += "<"
            if difference is not None:
                labels[0] = "<" + labels[0]
            self.cbar.set_ticks(ticks)  # must be called before we can set the labels
            self.cbar.set_ticklabels(labels)
        
        self.ax.set_title(title, loc="left", weight="bold")
        self.ax.set_xlabel(xlabel)
        self.ax.set_ylabel(ylabel)
        self.ax.set_aspect("auto")  # otherwise imshow assumes the axes have the same units
        self.ax.tick_params(axis="both", which="major", direction="inout", right=True, top=True)