Source code for lipyphilic.lib.plotting

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# lipyphilic --- lipyphilic.readthedocs.io
#
# Released under the GNU Public Licence, v2 or any higher version
#

"""Plotting utilities --- :mod:`lipyphilic.lib.plotting`
========================================================

:Author: Paul Smith
:Year: 2021
:Copyright: GNU Public License v2

Generally, **lipyphilic** is not a plotting library --- everyone has their favourite plotting
tool and aesthetics and so plotting is generally left up to the user. However, some plots
are complex to make, requiring further processing of results or lots of boilerplate code
to get the end result.

This module provides methods for plotting joint probability densities and lateral
distribution maps of lipid properties projected onto the membrane plane.

The class :class:`lipyphilic.lib.plotting.ProjectionPlot` can be used, for example, to plot
the area per lipid projected onto the membrane plane, i.e. plot the area per lipid
as a function of :math:`xy`. See `Gu et al. (2020)
<https://pubs.acs.org/doi/full/10.1021/jacs.9b11057>`__ for examples of these
projection plots.

The class :class:`lipyphilic.lib.plotting.JointDensity` can be used, for example, to
plot a 2D PMF of cholesterol orientation and height in a lipid membrane. See
`Gu et al. (2019) <https://pubs.acs.org/doi/10.1021/acs.jctc.8b00933>`__
for an example of the this 2D PMF.

The classes and their methods
-----------------------------

.. autoclass:: ProjectionPlot
    :members:

.. autoclass:: JointDensity
    :members:
"""

import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn as sns


[docs]class ProjectionPlot: """Plot membrane properties as a function of `xy`. See This class can be used for plotting membrane properties projected onto the :math:`xy` plane. This is useful, for example, for detecting phase separation in lipid membranes. The plotted data are stored in the `.statistic` attribute. This means, if you plot separately the projection of a membrane property of lipids in the upper and lower leaflets, you can easily calculate the correlation coefficient of this property across the leaflets. """ def __init__(self, x_pos, y_pos, values): self.x_pos = x_pos self.y_pos = y_pos self.values = values self.x_edges = None self.y_edges = None self.fig = None self.ax = None self.cbar = None
[docs] def project_values(self, bins, statistic="mean"): """Discretise the membrane and project values onto the xy-plane Parameters ---------- bins: int or array_like or [int, int] or [array, array] The bin specification: ``int`` If int, the number of bins for the two dimensions (nx=ny=bins). ``array-like`` If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins). ``[int, int]`` If [int, int], the number of bins in each dimension (nx, ny = bins). ``[array, array]`` If [array, array], the bin edges in each dimension (x_edges, y_edges = bins). ``combination`` A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. statistic : string or callable, optional The statistic to project onto the membrae plane (the default is 'mean'). The following statistics are available: ``mean`` compute the mean of values for points within each bin. Empty bins will be represented by NaN. ``std``` compute the standard deviation within each bin. ``median`` compute the median of values for points within each bin. Empty bins will be represented by NaN. ``count`` compute the count of points within each bin. This is identical to an unweighted histogram. The value of the membrane property is not referenced. ``sum`` compute the sum of values for points within each bin. This is identical to a weighted histogram. ``min`` compute the minimum of values for points within each bin. Empty bins will be represented by NaN. ``max`` compute the maximum of values for point within each bin. Empty bins will be represented by NaN. ``function`` a user-defined function which takes a 1D array of values, and outputs a single numerical statistic. This function will be called on the values in each bin. Empty bins will be represented by function([]), or NaN if this returns an error. """ self.statistic, self.x_edges, self.y_edges, _ = scipy.stats.binned_statistic_2d( x=self.x_pos, y=self.y_pos, values=self.values, bins=bins, statistic=statistic, )
[docs] def interpolate(self, tile=True, method="linear", fill_value=np.NaN, ): """Interpolate NaN values in the projection array. Uses scipy.interpolate.griddata to interpolate missing values and optionally remove NaN values. Parameters ---------- tile: bool, optional If `True`, the xy values will be tiled on a (3, 3) grid to reproduce the effect of periodic boundary conditions. If `False`, no periodic boundary conditions are taken into account when interpolating. method: {'linear', 'nearest', 'cubic'}, optional Method of interpolation. One of: ``nearest`` return the value at the data point closest to the point of interpolation. See SciPy's `NearestNDInterpolator` for more details. ``linear`` tessellate the input point set to N-D simplices, and interpolate linearly on each simplex. See SciPy's `LinearNDInterpolator` for more details. ``cubic`` return the value determined from a piecewise cubic, continuously differentiable (C1), and approximately curvature-minimizing polynomial surface. See SciPy's `CloughTocher2DInterpolator` for more details. fill_value : float, optional Value used to fill in for requested points outside of the convex hull of the input points. This option has no effect for the 'nearest' method. If not provided, then the these points will have NaN values. rescale : bool, optional Rescale points to unit cube before performing interpolation. This is useful if some of the input dimensions have incommensurable units and differ by many orders of magnitude. """ statistic_nbins_x, statistic_nbins_y = self.statistic.shape statistic = np.tile(self.statistic, reps=(3, 3)) if tile else self.statistic # this snippet is taken from: https://stackoverflow.com/a/37882746 x, y = np.indices(statistic.shape) statistic[np.isnan(statistic)] = scipy.interpolate.griddata( (x[~np.isnan(statistic)], y[~np.isnan(statistic)]), # points we know statistic[~np.isnan(statistic)], # values we know (x[np.isnan(statistic)], y[np.isnan(statistic)]), # points to interpolate method=method, ) if tile: self.statistic = statistic[statistic_nbins_x:statistic_nbins_x * 2, statistic_nbins_y:statistic_nbins_y * 2] else: self.statistic = statistic
[docs] def plot_projection( self, ax=None, title=None, xlabel=None, ylabel=None, cmap=None, vmin=None, vmax=None, cbar=True, cbar_kws=None, imshow_kws=None, ): """Plot the 2D projection of a membrane property. Use matplotlib.pyplot.imshow to plot a heatmap of the values. Parameters ---------- ax: Axes, optional Matplotlib Axes on which to plot the projection. The default is `None`, in which case a new figure and axes will be created. title: str, optional Title for the plot. By default, there is no title. xlabel: str, optional Label for the x-axis. By default, there is no label on the x-axis. ylabel: str, optional Label for the y-axis. By default, there is no label on the y-axis. cmap : str or `~matplotlib.colors.Colormap`, optional The Colormap instance or registered colormap name used to map scalar data to colors. vmin, vmax : float, optional Define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. cbar : bool, optional Whether or not to add a colorbar to the plot. cbar_kws : dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.colorbar. imshow_kws : dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.imshow, which is used to plot the 2D density map. Returns ------- ProjectionPlot.fig Matplotlib Figure on which the plot was drawn. ProjectionPlot.ax Matplotlib Axes on which the plot was drawn. ProjectionPlot.cbar If a colorbar was added to the plot, this is the Matplotlib colorbar instance for ProjectionPlot.ax. Otherwise it is `None`. """ # Determine where to plot the figure if ax is None: self.fig, self.ax = plt.subplots(1, figsize=(3, 3)) else: self.fig = plt.gcf() self.ax = ax plt.sca(self.ax) # imshow transposes the data values = self.statistic.T cbar_kws = dict() if cbar_kws is None else cbar_kws imshow_kws = dict() if imshow_kws is None else imshow_kws # we cannot pass vmin/vmax to imshow if norm is also passed if "norm" in imshow_kws: vmin = min(imshow_kws['norm'].boundaries) vmax = max(imshow_kws['norm'].boundaries) # we need vmin and vmax to be set to sensible values # to ensure the colorbar labels looks reasonable # And to clip the values if vmin is None: vmin = np.floor(np.nanmin(values)) if vmax is None: vmax = np.ceil(np.nanmax(values)) values = values.clip(vmin, vmax) if "norm" not in imshow_kws: imshow_kws['vmin'] = vmin imshow_kws['vmax'] = vmax # Detmine which cmap to use if cmap is None: cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True, reverse=True) # Finally we can plot the density/PMF self._imshow = plt.imshow( values, origin='lower', # this is necessary to make sure the y-axis is not inverted cmap=cmap, **imshow_kws ) # And add a colourbar if necessary if cbar: if "aspect" not in cbar_kws: cbar_kws["aspect"] = 30 if "pad" not in cbar_kws: cbar_kws["pad"] = 0.025 self.cbar = self.fig.colorbar(self._imshow, **cbar_kws) self.ax.set_title(title, loc="left", weight="bold") self.ax.set_xlabel(xlabel) self.ax.set_ylabel(ylabel) self.ax.set_aspect('equal', adjustable='box') self.ax.tick_params(axis="both", which="major", direction="inout", right=True, top=True) self.ax.set_xticks([]) self.ax.set_yticks([])
[docs]class JointDensity: """Calculate and plot the joint probability density of two observables. """ def __init__(self, ob1, ob2): """Set up parameters for calculating joint densities. Parameters ---------- ob1: array_like An array containing values of the first observable. ob2: array_like An array containing values of the second observable. It **must** have the same shape as `ob1` """ if np.array(ob1).shape != np.array(ob2).shape: raise ValueError("`ob1` and `ob2` must be arrays of the same shape.") self.ob1 = np.array(ob1) self.ob2 = np.array(ob2) self.ob1_mesh_bins = None self.ob2_mesh_bins = None self.joint_mesh_values = None self.temperature = None self.fig = None self.ax = None self.cbar = None
[docs] def calc_density_2D(self, bins, filter_by=None, temperature=None): """Calculate the joint probability density of two observables. If a tempearutre is provided, the PMF is calculated directly from the probability distribution. Parameters ---------- bins: int or array_like or [int, int] or [array, array] The bin specification: ``int`` If int, the number of bins for the two dimensions (nx=ny=bins). ``array-like`` If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins). ``[int, int]`` If [int, int], the number of bins in each dimension (nx, ny = bins). ``[array, array]`` If [array, array], the bin edges in each dimension (x_edges, y_edges = bins). ``combination`` A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. filter_by: 2D numpy array of shape (n_residues, n_frames), optional A boolean mask for filtering lipids or frames. The default is `None`, in which case no filtering is performed. temperature: float, optional Temperature of the system, which will be used to convert the 2D density into a PMF. The default is `None`, in which case the density is returned rather than the PMF. """ if filter_by is not None: filter_by = np.array(filter_by) if filter_by.shape != self.ob1.shape: raise ValueError("`filter_by` must be an array with the same shape as `ob1` and `ob2`.") density, ob1_bin_edges, ob2_bin_edges = np.histogram2d( self.ob1[filter_by].flatten(), self.ob2[filter_by].flatten(), density=True, bins=bins ) else: density, ob1_bin_edges, ob2_bin_edges = np.histogram2d( self.ob1.flatten(), self.ob2.flatten(), density=True, bins=bins ) # We need to create a grid for plotting with imshow self.ob1_mesh_bins, self.ob2_mesh_bins = np.meshgrid( ob1_bin_edges[:-1] + (ob1_bin_edges[1] - ob1_bin_edges[0]) / 2, ob2_bin_edges[:-1] + (ob2_bin_edges[1] - ob2_bin_edges[0]) / 2, ) # determine whether we use probability density or PMF self.temperature = temperature if self.temperature is not None: ln_density = np.log(density) ln_density[~np.isfinite(ln_density)] = np.NaN free_energy = -(scipy.constants.Boltzmann * scipy.constants.Avogadro / 4184) * temperature * ln_density self.joint_mesh_values = free_energy else: self.joint_mesh_values = density
[docs] def interpolate(self, method="linear", fill_value=None, rescale=True): """Interpolate NaN values in the joint probability density or PMF. Uses scipy.interpolate.griddata to interpolate the joint density and optionally remove NaN values. Parameters ---------- method: {'linear', 'nearest', 'cubic'}, optional Method of interpolation. One of: ``nearest`` return the value at the data point closest to the point of interpolation. See SciPy's `NearestNDInterpolator` for more details. ``linear`` tessellate the input point set to N-D simplices, and interpolate linearly on each simplex. See SciPy's `LinearNDInterpolator` for more details. ``cubic`` return the value determined from a piecewise cubic, continuously differentiable (C1), and approximately curvature-minimizing polynomial surface. See SciPy's `CloughTocher2DInterpolator` for more details. fill_value : float, optional Value used to fill in for requested points outside of the convex hull of the input points. This option has no effect for the 'nearest' method. If not provided, then the default is to use the maximum free energy value if a PMF was calculated, or 0 otherwise. rescale : bool, optional Rescale points to unit cube before performing interpolation. This is useful if some of the input dimensions have incommensurable units and differ by many orders of magnitude. """ # this snippet is taken from: https://stackoverflow.com/a/37882746 x, y = np.indices(self.joint_mesh_values.shape) if fill_value is None: if self.temperature is None: fill_value = 0.0 else: fill_value = np.nanmax(self.joint_mesh_values) self.joint_mesh_values[np.isnan(self.joint_mesh_values)] = scipy.interpolate.griddata( (x[~np.isnan(self.joint_mesh_values)], y[~np.isnan(self.joint_mesh_values)]), # points we know self.joint_mesh_values[~np.isnan(self.joint_mesh_values)], # values we know (x[np.isnan(self.joint_mesh_values)], y[np.isnan(self.joint_mesh_values)]), # points to interpolate method=method, fill_value=fill_value, rescale=rescale )
[docs] def plot_density( self, difference=None, ax=None, title=None, xlabel=None, ylabel=None, cmap=None, vmin=None, vmax=None, n_contours=4, contour_labels=None, cbar=True, cbar_kws=None, imshow_kws=None, contour_kws=None, clabel_kws=None, ): """Plot the 2D density or PMF. Use matplotlib.pyplot.imshow to plot a heatmap of the density. Optionally, add contour lines using matplotlib.pyplot.contour and label the contours with their values. Parameters ---------- difference: JointDensity, optional A JointDensity object for which the probability density or PMF has been calculated. Before ploting, the density or PMF of `difference` will be subtracted from the density of PMF of this object. This is useful for plotting difference in PMFs due to e.g a change in membrane lipid composition. ax: Axes, optional Matplotlib Axes on which to plot the 2D denstiy. The default is `None`, in which case a new figure and axes will be created. title: str, optional Title for the plot. By default, there is no title. xlabel: str, optional Label for the x-axis. By default, there is no label on the x-axis. ylabel: str, optional Label for the y-axis. By default, there is no label on the y-axis. cmap : str or `~matplotlib.colors.Colormap`, optional The Colormap instance or registered colormap name used to map scalar data to colors. vmin, vmax : float, optional Define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. n_contours: int or array-like, optional Determines the number and positions of the contour lines / regions plotted with matplotlib.pyplot.contour: ``int`` If an int *n*, use `~matplotlib.ticker.MaxNLocator`, which tries to automatically choose no more than *n+1* "nice" contour levels between *vmin* and *vmax*. ``array-like`` If array-like, draw contour lines at the specified levels. The values must be in increasing order. ``0`` If 0, no contour lines are drawn. contour_labels : array-like, optional A list of contour level indices specifyig which levles should be labeled. The default is `None`, in which case no contours are labeled. cbar : bool, optional Whether or not to add a colorbar to the plot. cbar_kws : dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.colorbar. imshow_kws : dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.imshow, which is used to plot the 2D density map. contour_kws : dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.contour, which is used to plot the contour lines. clabel_kws: dict, optional A dictionary of keyword options to pass to matplotlib.pyplot.contour, which is used to add labels to the contour lines. Returns ------- JointDensity.fig Matplotlib Figure on which the plot was drawn. JointDensity.ax Matplotlib Axes on which the plot was drawn. JointDensity.cbar If a colorbar was added to the plot, this is the Matplotlib colorbar instance for JointDensity.ax. Otherwise it is `None`. """ # Determine where to plot the figure if ax is None: self.fig, self.ax = plt.subplots(1, figsize=(3, 3)) else: self.fig = plt.gcf() self.ax = ax plt.sca(self.ax) values = self.joint_mesh_values.T - difference.joint_mesh_values.T if difference is not None else self.joint_mesh_values.T cbar_kws = dict() if cbar_kws is None else cbar_kws imshow_kws = dict() if imshow_kws is None else imshow_kws contour_kws = dict() if contour_kws is None else contour_kws clabel_kws = dict() if clabel_kws is None else clabel_kws # we cannot pass vmin/vmax to imshow if norm is also passed if "norm" in imshow_kws: vmin = min(imshow_kws['norm'].boundaries) vmax = max(imshow_kws['norm'].boundaries) # we need vmin and vmax to be set to sensible values # to ensure the colorbar labels looks reasonable # And to clip the values if vmin is None and self.temperature is not None: vmin = min(0.0, np.floor(np.nanmin(values))) elif vmin is None: vmin = np.nanmin(values) if vmax is None and self.temperature is not None: vmax = max(0.0, np.ceil(np.nanmax(values))) elif vmax is None: vmax = np.nanmax(values) # make sure the colourbar is centered at zero if we're looking doing a difference plot if difference is not None and "norm" not in imshow_kws: vmax = max(vmax, abs(vmin)) vmin = -vmax if "norm" not in imshow_kws: imshow_kws['vmin'] = vmin imshow_kws['vmax'] = vmax values = values.clip(vmin, vmax) # Add contours if necessary if "colors" not in contour_kws.keys(): contour_kws["colors"] = "xkcd:dark grey" self._contours = contours = plt.contour( self.ob1_mesh_bins, self.ob2_mesh_bins, values, levels=n_contours, **contour_kws ) # And contour labels if contour_labels is not None: levels = np.array(contours.levels) self._clabels = plt.clabel(contours, levels=levels[contour_labels], **clabel_kws) # Get the extent of the distributions # This is necessary for imshow dx = np.diff(self.ob1_mesh_bins[0][:2])[0] dy = np.diff(self.ob2_mesh_bins[:, 0][:2])[0] extent = [ self.ob1_mesh_bins[0][0] - dx / 2, self.ob1_mesh_bins[0][-1] + dx / 2 + 1, self.ob2_mesh_bins[0][0] - dy / 2, self.ob2_mesh_bins[-1][0] + dy / 2, ] # Detmine which cmap to use if cmap is None and difference is None: cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True, reverse=True) elif cmap is None: cmap = sns.color_palette(palette="RdBu_r", n_colors=200, as_cmap=True) # Finally we can plot the density/PMF self._imshow = plt.imshow( values, origin='lower', # this is necessary to make sure the y-axis is not inverted extent=extent, cmap=cmap, **imshow_kws ) # And add a colourbar if necessary if cbar: if "label" not in cbar_kws: if self.temperature is not None and difference is not None: cbar_kws["label"] = r"$\Delta\, \rm PMF$" # pragma: no cover # testing for this label works locally but fails with tox/Travis elif self.temperature is not None: cbar_kws["label"] = "PMF" # pragma: no cover # testing for this label works locally but fails with tox/Travis else: cbar_kws["label"] = "Probability density" if "pad" not in cbar_kws: cbar_kws["pad"] = 0.025 self.cbar = self.fig.colorbar(self._imshow, **cbar_kws) # For some reason the aspect is not set by passing it as a keyword aspect = cbar_kws["aspect"] if "aspect" in cbar_kws else 30 self.cbar.ax.set_aspect(aspect) ticks = self.cbar.get_ticks() labels = ticks.round(2).astype(str) labels[-1] += "<" if difference is not None: labels[0] = "<" + labels[0] self.cbar.set_ticks(ticks) # must be called before we can set the labels self.cbar.set_ticklabels(labels) self.ax.set_title(title, loc="left", weight="bold") self.ax.set_xlabel(xlabel) self.ax.set_ylabel(ylabel) self.ax.set_aspect("auto") # otherwise imshow assumes the axes have the same units self.ax.tick_params(axis="both", which="major", direction="inout", right=True, top=True)