"""
Water-mediated interactions --- :mod:`prolif.interactions.water_bridge`
=======================================================================
This module contains the :class:`~prolif.interactions.water_bridge.WaterBridge` class
for analyzing water-mediated interactions. It does so by generating fingerprints for
ligand-water, water-protein, and water-water interactions. The results are then
combined to identify water-bridged interactions using a graph-based approach.
.. versionadded:: 2.1.0
"""
import itertools as it
from collections import defaultdict
from collections.abc import Iterable, Iterator
from typing import TYPE_CHECKING, Any, Optional, Union, cast
import networkx as nx
from MDAnalysis.core.groups import UpdatingAtomGroup
from prolif.ifp import IFP, InteractionData
from prolif.interactions.base import BridgedInteraction
if TYPE_CHECKING:
from prolif.molecule import Molecule
from prolif.residue import ResidueId
from prolif.typeshed import IFPResults, MDAObject, Trajectory
[docs]class WaterBridge(BridgedInteraction):
"""Implementation of the water-bridge analysis.
Parameters
----------
water : Union[MDAnalysis.core.groups.AtomGroup, Iterable[prolif.molecule.Molecule]]
An MDAnalysis AtomGroup or iterable of prolif Molecule objects containing the
water molecules
order : int
Maximum number of water molecules that can be involved in a water-bridged
interaction.
min_order : int
Minimum number of water molecules that can be involved in a water-bridged
interaction.
hbdonor : Optional[dict]
Parameters for the HBDonor interaction passed to the underlying fingerprint
generator. See :class:`~prolif.interactions.interactions.HBDonor` for more
details.
hbacceptor : Optional[dict]
Same as above for :class:`~prolif.interactions.interactions.HBAcceptor`.
atomgroup_converter_kwargs : Optional[dict]
Optional parameters passed to the MDAnalysis' RDKitConverter if the
specified `water` is an MDAnalysis AtomGroup.
count : bool
Whether to generate a count fingerprint or just a binary one.
Notes
-----
This analysis currently only runs in serial.
.. versionadded:: 2.1.0
"""
def __init__(
self,
water: Union["MDAObject", Iterable["Molecule"]],
order: int = 1,
min_order: int = 1,
hbdonor: dict | None = None,
hbacceptor: dict | None = None,
atomgroup_converter_kwargs: dict | None = None,
count: bool = False,
) -> None:
# circular import
from prolif.fingerprint import Fingerprint
if order < 1:
raise ValueError("order must be greater than 0")
if min_order > order:
raise ValueError("min_order cannot be greater than order")
self.water = water
# handle AtomGroup generated with `updating=True` automatically
self.water_conv_kwargs = atomgroup_converter_kwargs or (
{"NoImplicit": False, "cache": False}
if isinstance(water, UpdatingAtomGroup)
else {}
)
self.order = order
self.min_order = min_order
self.water_fp = Fingerprint(
interactions=["HBDonor", "HBAcceptor"],
parameters={"HBDonor": hbdonor or {}, "HBAcceptor": hbacceptor or {}},
count=count,
)
super().__init__()
[docs] def setup(self, ifp_store: Optional["IFPResults"] = None, **kwargs: Any) -> None:
super().setup(ifp_store=ifp_store, **kwargs)
self.kwargs.pop("n_jobs", None)
self.residues = self.kwargs.pop("residues", None)
self.converter_kwargs = self.kwargs.pop("converter_kwargs", ({}, {}))
self.water_fp.use_segid = self.kwargs.pop("use_segid", False)
[docs] def run(
self,
traj: "Trajectory",
lig: "MDAObject",
prot: "MDAObject",
) -> "IFPResults":
"""Run the water bridge analysis for a trajectory.
Parameters
----------
traj : Union[MDAnalysis.coordinates.base.ProtoReader, MDAnalysis.coordinates.base.FrameIteratorSliced]
Iterate over this Universe trajectory or sliced trajectory object
to extract the frames used for the fingerprint extraction
lig : MDAnalysis.core.groups.AtomGroup
An MDAnalysis AtomGroup for the ligand
prot : MDAnalysis.core.groups.AtomGroup
An MDAnalysis AtomGroup for the protein (with multiple residues)
""" # noqa: E501
water_obj = cast("MDAObject", self.water)
# Run analysis for ligand-water and water-protein interactions
lig_water_ifp: dict[int, IFP] = self.water_fp._run_serial(
traj,
lig,
water_obj,
residues=None,
converter_kwargs=(self.converter_kwargs[0], self.water_conv_kwargs),
**self.kwargs,
desc="Ligand-Water",
)
water_prot_ifp: dict[int, IFP] = self.water_fp._run_serial(
traj,
water_obj,
prot,
residues=self.residues,
converter_kwargs=(self.water_conv_kwargs, self.converter_kwargs[1]),
**self.kwargs,
desc="Water-Protein",
)
if self.order >= 2:
# Run water-water interaction analysis
water_ifp: dict[int, IFP] | None = self.water_fp._run_serial(
traj,
water_obj,
water_obj,
residues=None,
converter_kwargs=(self.water_conv_kwargs, self.water_conv_kwargs),
**self.kwargs,
desc="Water-Water",
)
else:
water_ifp = None
for frame in lig_water_ifp:
ifp_lw = lig_water_ifp[frame] # Ligand → Water
ifp_wp = water_prot_ifp[frame] # Water → Protein
self.ifp.setdefault(frame, IFP())
if water_ifp is not None:
ifp_ww = water_ifp[frame] # WaterX -> WaterY
self._any_order(frame, ifp_lw, ifp_ww, ifp_wp)
else:
self._first_order_only(frame, ifp_lw, ifp_wp)
return self.ifp
[docs] def run_from_iterable(
self, lig_iterable: Iterable["Molecule"], prot_mol: "Molecule"
) -> "IFPResults":
"""Run the water-bridge analysis for an iterable of molecules.
Parameters
----------
lig_iterable : list or generator
An iterable yielding ligands as :class:`~prolif.molecule.Molecule`
objects
prot_mol : prolif.molecule.Molecule
The protein
"""
water_obj = cast("Molecule", self.water)
# Run analysis for ligand-water and water-protein interactions
lig_water_ifp: "IFPResults" = self.water_fp._run_iter_serial(
lig_iterable, water_obj, residues=None, **self.kwargs
)
water_prot_ifp: "IFPResults" = self.water_fp._run_iter_serial(
[water_obj], prot_mol, residues=self.residues, **self.kwargs
)
ifp_wp = water_prot_ifp[0] # Water → Protein
if self.order >= 2:
# Run water-water interaction analysis
water_ifp: "IFPResults" = self.water_fp._run_iter_serial(
[water_obj], water_obj, residues=None, **self.kwargs
)
ifp_ww: IFP | None = water_ifp[0] # WaterX -> WaterY
else:
ifp_ww = None
for pose in lig_water_ifp:
ifp_lw = lig_water_ifp[pose] # Ligand → Water
self.ifp.setdefault(pose, IFP())
if ifp_ww is not None:
self._any_order(pose, ifp_lw, ifp_ww, ifp_wp)
else:
self._first_order_only(pose, ifp_lw, ifp_wp)
return self.ifp
def _first_order_only(self, frame: int, ifp_lw: IFP, ifp_wp: IFP) -> None:
"""Iterates over all relevant combinations of ligand-water-protein"""
# for each ligand-water interaction
for data_lw in ifp_lw.interactions():
# for each water-protein interaction
for data_wp in ifp_wp.interactions():
if data_lw.protein == data_wp.ligand:
self._merge_metadata(frame, data_lw, data_wp)
def _any_order(self, frame: int, ifp_lw: IFP, ifp_ww: IFP, ifp_wp: IFP) -> None:
"""Generate results for any order of water-bridge interactions.
Constructs a graph to represent the water network and iterates over all paths
up to a given length (corresponding to ``order + 1``).
"""
# MultiGraph to allow the same pair of nodes to interact as both HBA and HBD
# and potentially multiple groups of atoms satisfying the constraints.
# Each of these interaction will have its own edge in the graph.
graph: nx.MultiGraph["ResidueId"] = nx.MultiGraph()
nodes: defaultdict[str, set[ResidueId]] = defaultdict(set)
# construct graph of water interactions
for ifp, role in [(ifp_lw, "ligand"), (ifp_wp, "protein")]:
for data in ifp.interactions():
graph.add_edge(data.ligand, data.protein, int_data=data)
# assign ligand and protein residue nodes to corresponding role
nodes[role].add(getattr(data, role))
# remove mirror interactions before adding water-water to the graph
deduplicated = {
frozenset((ligand_resid, protein_resid))
for ligand_resid, protein_resid in ifp_ww
if ligand_resid != protein_resid
}
for ligand_resid, protein_resid in deduplicated:
ww_dict = ifp_ww.data[ligand_resid, protein_resid]
for int_name, metadata_tuple in ww_dict.items():
for metadata in metadata_tuple:
data = InteractionData(
ligand=ligand_resid,
protein=protein_resid,
interaction=int_name,
metadata=metadata,
)
graph.add_edge(
data.ligand, data.protein, int_data=data, water_only=True
)
# find all edge paths of length up to `order + 1`
for source in nodes["ligand"]:
targets = (t for t in nodes["protein"] if nx.has_path(graph, source, t))
paths = cast(
Iterator[list[tuple["ResidueId", "ResidueId", str]]],
nx.all_simple_edge_paths(graph, source, targets, cutoff=self.order + 1),
)
for path in paths:
if len(path) <= self.min_order:
continue
# path is a list[tuple[node_id1, node_id2, deduplication_key]]
# first element in path is lig-water1, last is waterN-prot
data_lw = cast(InteractionData, graph.edges[path[0]]["int_data"])
data_wp = cast(InteractionData, graph.edges[path[-1]]["int_data"])
ww_edges = [graph.edges[p] for p in path[1:-1]]
# only include if strictly passing through water (going back through
# ligand or protein is not a valid higher-order interaction)
if all(e.get("water_only") for e in ww_edges):
# reorder ligand and protein in InteractionData to be contiguous
# i.e. lig-w1, w1-w2, w2-prot instead of lig-w1, w2-w1, w2-prot
data_ww_list = []
left = data_lw.protein
for e in ww_edges:
d = cast(InteractionData, e["int_data"])
is_sorted = d.ligand == left
data_ww = InteractionData(
ligand=d.ligand if is_sorted else d.protein,
protein=d.protein if is_sorted else d.ligand,
# interaction name is not kept in final metadata
# so no need to invert it
interaction=d.interaction,
# the indices of "ligand" water and "protein" water are
# merged in the final metadata for water mols so no need to
# invert the roles in indices
metadata=d.metadata,
)
data_ww_list.append(data_ww)
left = data_ww.protein
self._merge_metadata(frame, data_lw, data_wp, *data_ww_list)
def _merge_metadata(
self,
frame: int,
data_lw: InteractionData,
data_wp: InteractionData,
*data_ww_args: InteractionData,
) -> None:
"""Merge results from all fingerprints on matching water residues"""
# get indices for individual water molecules
water_indices = defaultdict(set)
for data, role in [
(data_lw, "protein"),
(data_wp, "ligand"),
*it.chain.from_iterable(
[
[(data_ww, "ligand"), (data_ww, "protein")]
for data_ww in data_ww_args
]
),
]:
resid = getattr(data, role)
water_indices[str(resid)].update(data.metadata["indices"][role])
# construct merged metadata
metadata = {
"indices": {
"ligand": data_lw.metadata["indices"]["ligand"],
"protein": data_wp.metadata["indices"]["protein"],
**{key: tuple(indices) for key, indices in water_indices.items()},
},
"parent_indices": {
"ligand": data_lw.metadata["parent_indices"]["ligand"],
"protein": data_wp.metadata["parent_indices"]["protein"],
"water": tuple(
set().union(
data_lw.metadata["parent_indices"]["protein"],
data_wp.metadata["parent_indices"]["ligand"],
*it.chain.from_iterable(
[
[
data_ww.metadata["parent_indices"]["ligand"],
data_ww.metadata["parent_indices"]["protein"],
]
for data_ww in data_ww_args
]
),
)
),
},
"water_residues": tuple(
dict.fromkeys( # uniquify but keep order
[
data_lw.protein,
*it.chain.from_iterable(
[
[data_ww.ligand, data_ww.protein]
for data_ww in data_ww_args
]
),
data_wp.ligand,
]
)
),
"order": len(data_ww_args) + 1,
"ligand_role": data_lw.interaction,
"protein_role": ( # invert role
"HBDonor" if data_wp.interaction == "HBAcceptor" else "HBAcceptor"
),
**{
f"{key}{suffix}": data.metadata[key]
for suffix, data in [
(f"_ligand_{data_lw.protein}", data_lw),
*it.chain.from_iterable(
[
[(f"_{data_ww.ligand}_{data_ww.protein}", data_ww)]
for data_ww in data_ww_args
]
),
(f"_{data_wp.ligand}_protein", data_wp),
]
for key in ["distance", "DHA_angle"]
},
"distance": sum(
data.metadata["distance"] for data in [data_lw, *data_ww_args, data_wp]
),
}
# store metadata
self.ifp[frame].setdefault((data_lw.ligand, data_wp.protein), {}).setdefault(
"WaterBridge", []
).append(metadata) # type: ignore[attr-defined]