Source code for openff.pablo.residue

"""
Classes for defining custom residues.
"""

import dataclasses
import itertools
import logging
from collections import defaultdict
from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from typing import TYPE_CHECKING, DefaultDict, Literal, Self, TextIO

from openff.toolkit.topology import Molecule
from openff.toolkit.utils import RDKitToolkitWrapper
from openff.units import elements, unit

from openff.pablo._graph import Graph
from openff.pablo._utils import (
    __UNSET__,
    draw_molecule,
    flatten,
    react,
    sort_tuple,
    unwrap,
)
from openff.pablo.exceptions import PabloError, ResidueValidationError

if TYPE_CHECKING:
    import IPython.core.display

__all__ = [
    "AtomDefinition",
    "BondDefinition",
    "ResidueDefinition",
]

_residue_definition_skip_validation = False


@contextmanager
def _skip_residue_definition_validation():  # type: ignore[deadcode]
    global _residue_definition_skip_validation
    _residue_definition_skip_validation = True
    yield
    _residue_definition_skip_validation = False


[docs]@dataclass(frozen=True)
class AtomDefinition:
    """
    Description of an atom in a residue from the Chemical Component Dictionary (CCD).
    """

    name: str
    """The canonical name of this atom"""
    synonyms: tuple[str, ...]
    """Other names this atom can have"""
    symbol: str
    """The elemental symbol for this atom"""
    leaving: bool
    """
    Whether this atom is absent when a bond is formed between two residues.

    When a bond is formed between this residue and another, a "leaving fragment"
    must be removed to satisfy the valence model with the new bond. The leaving
    fragment is computed by identifying a single leaving atom bonded to each of
    the atoms forming the bond, as well as all leaving atoms bonded to those
    first leaving atoms.

    See also
    --------
    openff.pablo.ResidueDefinition.linking_bond, openff.pablo.ResidueDefinition.crosslink
    """
    charge: int
    """The formal charge of this atom"""
    aromatic: bool
    """Whether this atom is aromatic"""
    stereo: Literal["S", "R"] | None
    """The chirality of this atom"""

[docs]    @classmethod
    def with_defaults(
        cls,
        name: str,
        symbol: str,
        *,
        synonyms: Iterable[str] = (),
        leaving: bool = False,
        charge: int = 0,
        aromatic: bool = False,
        stereo: Literal["S", "R"] | None = None,
    ) -> Self:
        """
        Construct a new atom definition, optionally using some default values.
        """
        return cls(
            name=name,
            symbol=symbol,
            synonyms=tuple(synonyms),
            leaving=leaving,
            charge=charge,
            aromatic=aromatic,
            stereo=stereo,
        )

[docs]    def replace(
        self,
        *,
        name: str | __UNSET__ = __UNSET__(),
        symbol: str | __UNSET__ = __UNSET__(),
        synonyms: Iterable[str] | __UNSET__ = __UNSET__(),
        leaving: bool | __UNSET__ = __UNSET__(),
        charge: int | __UNSET__ = __UNSET__(),
        aromatic: bool | __UNSET__ = __UNSET__(),
        stereo: Literal["S", "R"] | None | __UNSET__ = __UNSET__(),
    ) -> Self:
        """Return a copy of ``self`` with the specified attributes replaced."""
        return self.__class__(
            name=self.name if isinstance(name, __UNSET__) else name,
            symbol=self.symbol if isinstance(symbol, __UNSET__) else symbol,
            synonyms=(
                self.synonyms if isinstance(synonyms, __UNSET__) else tuple(synonyms)
            ),
            leaving=self.leaving if isinstance(leaving, __UNSET__) else leaving,
            charge=self.charge if isinstance(charge, __UNSET__) else charge,
            aromatic=self.aromatic if isinstance(aromatic, __UNSET__) else aromatic,
            stereo=self.stereo if isinstance(stereo, __UNSET__) else stereo,
        )

    def __repr__(self) -> str:
        return (
            f"AtomDefinition(name={self.name!r}"
            + (f", synonyms={self.synonyms!r}" if self.synonyms else "")
            + ", ...)"
        )

    @property
    def names(self) -> set[str]:
        """The set of the canonical name and all synonyms"""
        return {self.name, *self.synonyms}


[docs]@dataclass(frozen=True)
class BondDefinition:
    """
    Description of a bond in a residue from the Chemical Component Dictionary (CCD).
    """

    atom1: str
    """The canonical name of the first atom in this bond"""
    atom2: str
    """The canonical name of the second atom in this bond"""
    order: int
    """The bond order of this bond (1 for single bond, 2 for double bond, etc.)"""
    aromatic: bool
    """``True`` if this bond is aromatic, ``False`` otherwise"""
    stereo: Literal["E", "Z"] | None
    """The stereochemistry of this bond."""

[docs]    def flipped(self) -> Self:
        """The same bond, but with the atoms in the opposite order"""
        return dataclasses.replace(self, atom1=self.atom2, atom2=self.atom1)

[docs]    @classmethod
    def with_defaults(
        cls,
        atom1: str,
        atom2: str,
        *,
        order: int = 1,
        aromatic: bool = False,
        stereo: Literal["E", "Z"] | None = None,
    ) -> Self:
        """
        Construct a new bond definition, optionally using some default values.
        """
        return cls(
            atom1=atom1,
            atom2=atom2,
            order=order,
            aromatic=aromatic,
            stereo=stereo,
        )

[docs]    def replace(
        self,
        *,
        atom1: str | __UNSET__ = __UNSET__(),
        atom2: str | __UNSET__ = __UNSET__(),
        order: int | __UNSET__ = __UNSET__(),
        aromatic: bool | __UNSET__ = __UNSET__(),
        stereo: Literal["E", "Z"] | None | __UNSET__ = __UNSET__(),
    ) -> Self:
        """Return a copy of ``self`` with the specified attributes replaced."""
        return self.__class__(
            atom1=self.atom1 if isinstance(atom1, __UNSET__) else atom1,
            atom2=self.atom2 if isinstance(atom2, __UNSET__) else atom2,
            order=self.order if isinstance(order, __UNSET__) else order,
            aromatic=(self.aromatic if isinstance(aromatic, __UNSET__) else aromatic),
            stereo=self.stereo if isinstance(stereo, __UNSET__) else stereo,
        )


[docs]@dataclass(frozen=True)
class ResidueDefinition:
    """
    Description of a residue from the Chemical Component Dictionary (CCD).
    """

    residue_name: str | None
    """
    The 3-letter residue code used in PDB files, or ``None`` if only used anonymously.
    """
    description: str
    """A longer description of the residue"""
    linking_bond: BondDefinition | None
    """Description of how this residue may bond to its neighbours in a polymer

    If the residue is only found as a monomer, ``None``. Otherwise, a
    ``BondDefinition``. The ``atom1`` and ``atom2`` attributes give the
    canonical atom names of the atoms that form the linking bond. ``atom1`` is
    the name of the atom in the residue preceding the bond, and ``atom2`` is in
    the residue after the bond. Any atoms that the bond between residues
    replaces should be marked as ``atom.leaving=True``; there must be either
    zero or one leaving fragments for each atom in a linking bond for the
    residue definition to validate, and there must be exactly one for a bond to
    form.

    A linking bond will be formed to join two residues if and only if all of the
    below are true:

    - The residues' atom records are sequential in the PDB file
    - The residues have the same chain ID
    - There is no TER record between the residues in the PDB file
    - The two residues have identical ``linking_bond`` attributes
    - All leaving atoms associated with ``linking_bond.atom1`` are absent in
      the first encountered residue, and all leaving atoms associated with
      ``linking_bond.atom2`` are absent in the latter residue. Leaving atoms
      are those that have the ``AtomDefinition.leaving`` attribute set to
      ``True``. A leaving atom is associated with atom `a` if it is bonded to
      `a`, or it is bonded to a leaving atom associated with `a`.
    - There is at least one leaving atom associated with each linking atom

    The charge of linking atoms is not modified; any change in valence is
    accounted for via the removal of leaving atoms.
    """
    crosslink: BondDefinition | None
    """Optional description of a crosslink between this residue and another.

    A crosslink is a bond between this residue and another. Crosslinks differ
    from linking bonds primarily because they may occur between residues that
    are do not appear sequentially in the PDB file. Crosslinks cannot occur
    within a residue; use a bond or residue variant instead.

    If the residue does not form cross links, ``None``. Otherwise, a
    ``BondDefinition``. The ``atom1`` and ``atom2`` attributes give the canonical
    atom names of the atoms that form the crosslink. ``atom1`` is the name of
    the atom in this residue, and ``atom2`` is in the other residue. Any atoms
    that the bond between residues replaces should be marked as
    ``atom.leaving=True``; there must be either zero or one leaving fragments
    for each atom in a linking bond for the residue definition to validate, and
    there must be exactly one for a bond to form.

    A crosslink will be formed between two residues if and only if all of the
    below are true:

    - The two residues' ``crosslink`` attributes are identical except that their
      atom names are reversed
    - All leaving atoms associated with each residues' ``crosslink.atom1``
      attribute are absent in that residue. Leaving atoms are those that have
      the ``AtomDefinition.leaving`` attribute set to ``True``. A leaving atom
      is associated with atom `a` if it is bonded to `a`, or it is bonded to a
      leaving atom associated with `a`.
    - There is at least one leaving atom associated with each cross-linking atom

    The charge of linking atoms is not modified; any change in valence is
    accounted for via the removal of leaving atoms.
    """
    atoms: tuple[AtomDefinition, ...]
    """The atom definitions that make up this residue.

    Atoms may be marked as `leaving` only if they are associated with a
    linking or crosslinking bond; see the API documentation for those fields.
    All atoms must have unique canonical names."""
    bonds: tuple[BondDefinition, ...]
    """The bond definitions that make up this residue"""
    virtual_sites: tuple[str, ...]
    """Atom names that represent virtual sites.

    All these names must be present for the residue definition to match."""

    @property
    def n_expected_atoms(self) -> int:
        """The number of atoms that a matching residue without any linkages has.

        In practice, the number of atoms including leaving atoms, plus the
        number of virtual sites. A residue with linkages can have no more than
        this many atoms.
        """
        return len(self.atoms) + len(self.virtual_sites)

    @property
    def n_core_atoms(self) -> int:
        """The number of non-leaving atoms in the residue.

        Excludes virtual sites. A matching residue can have no fewer than this
        many atoms.
        """
        return sum(1 for atom in self.atoms if not atom.leaving)

    def __repr__(self) -> str:
        return f"ResidueDefinition(description={self.description!r}, ...)"

[docs]    def replace(
        self,
        *,
        residue_name: str | __UNSET__ = __UNSET__(),
        description: str | __UNSET__ = __UNSET__(),
        linking_bond: BondDefinition | None | __UNSET__ = __UNSET__(),
        crosslink: BondDefinition | None | __UNSET__ = __UNSET__(),
        atoms: Iterable[AtomDefinition] | __UNSET__ = __UNSET__(),
        bonds: Iterable[BondDefinition] | __UNSET__ = __UNSET__(),
        virtual_sites: Iterable[str] | __UNSET__ = __UNSET__(),
    ) -> Self:
        """Return a copy of ``self`` with the specified attributes replaced."""
        return self.__class__(
            residue_name=(
                self.residue_name
                if isinstance(residue_name, __UNSET__)
                else residue_name
            ),
            description=(
                self.description if isinstance(description, __UNSET__) else description
            ),
            linking_bond=(
                self.linking_bond
                if isinstance(linking_bond, __UNSET__)
                else linking_bond
            ),
            crosslink=self.crosslink if isinstance(crosslink, __UNSET__) else crosslink,
            atoms=self.atoms if isinstance(atoms, __UNSET__) else tuple(atoms),
            bonds=self.bonds if isinstance(bonds, __UNSET__) else tuple(bonds),
            virtual_sites=(
                self.virtual_sites
                if isinstance(virtual_sites, __UNSET__)
                else tuple(virtual_sites)
            ),
        )

    @property
    def is_anonymous(self) -> bool:
        """
        ``True`` if atom names may be used for matching, ``False`` otherwise.
        """
        return self.residue_name is None

    def __post_init__(self):
        if _residue_definition_skip_validation:
            return

        self._validate()

    def _validate(self):
        if (
            self.linking_bond is None
            and self.crosslink is None
            and True in {atom.leaving for atom in self.atoms}
            and not self.is_anonymous
        ):
            raise ResidueValidationError(
                f"{self.residue_name}: Leaving atoms were specified, but there is no linking bond or crosslink",
                self,
            )
        if len({atom.name for atom in self.atoms}) != len(self.atoms):
            raise ResidueValidationError(
                f"{self.residue_name}: All atoms must have unique canonical names",
            )

        unassigned_leaving_atoms = self._unassigned_leaving_atoms()
        if len(unassigned_leaving_atoms) != 0 and not self.is_anonymous:
            raise ResidueValidationError(
                f"{self.residue_name}: Leaving atoms could not be assigned to a"
                + f" bond: {unassigned_leaving_atoms}",
            )

        all_canonical_names = {atom.name for atom in self.atoms}
        all_synonyms = set(flatten(atom.synonyms for atom in self.atoms))
        all_atom_names = all_canonical_names.union(all_synonyms)
        if not set(self.virtual_sites).isdisjoint(all_atom_names):
            raise ResidueValidationError(
                f"{self.residue_name}: Virtual sites may not clash with any atom name"
                + f" or synonym: {all_atom_names.intersection(self.virtual_sites)}",
            )

        linking_atoms: list[tuple[str, int]] = []
        if self.linking_bond is not None:
            linking_atoms.extend(
                [
                    (self.linking_bond.atom1, self.linking_bond.order),
                    (self.linking_bond.atom2, self.linking_bond.order),
                ],
            )
        if self.crosslink is not None:
            linking_atoms.append((self.crosslink.atom1, self.crosslink.order))
        for linking_atom, bond_order in linking_atoms:
            bonded_atoms = [
                atom
                for atom in self.atoms_bonded_to(linking_atom)
                if self.name_to_atom[atom].leaving
            ]
            if len(bonded_atoms) > 1:
                raise ResidueValidationError(
                    f"{self.residue_name} ({self.description}): Linking atom"
                    + f"{linking_atom} must be bonded to no more than 1 leaving atom",
                )
            elif len(bonded_atoms) == 0:
                continue

            bonded_atom = unwrap(bonded_atoms)
            replaced_bond_order = unwrap(
                bond.order
                for bond in self.bonds
                if (
                    sort_tuple((bond.atom1, bond.atom2))
                    == sort_tuple((linking_atom, bonded_atom))
                )
            )
            if bond_order != replaced_bond_order:
                raise ResidueValidationError(
                    f"{self.residue_name} ({self.description}): Bond between"
                    + f" linking atom {linking_atom} and leaving fragment"
                    + f" beginning at atom {bonded_atom} has order"
                    + f" {replaced_bond_order}, but the linking bond has order"
                    + f" {bond_order}",
                )

[docs]    @classmethod
    def anon_from_molecule(
        cls,
        molecule: Molecule,
        description: str | None = None,
        virtual_sites: Collection[str] = (),
    ) -> Self:
        """
        Create an anoymous ``ResidueDefinition`` from an ``openff.toolkit.Molecule``.

        Parameters
        ----------
        molecule
            The ``Molecule`` object. Leaving atoms are identified from the atom
            metadata; atom's whose metadata includes a truthy value for the key
            ``"leaving_atom"`` are marked as leaving atoms.
        description
            An optional string describing the residue. Taken from ``molecule``
            ``"description"`` property if ``None``. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        """
        molecule_crosslink = molecule.properties.get("crosslink")
        if isinstance(molecule_crosslink, BondDefinition):
            crosslink = molecule_crosslink
        else:
            crosslink = None

        molecule_linking_bond = molecule.properties.get("crosslink")
        if isinstance(molecule_linking_bond, BondDefinition):
            linking_bond = molecule_linking_bond
        else:
            linking_bond = None

        if description is None:
            molecule_description = molecule.properties.get("description")
            if isinstance(molecule_description, str):
                description = molecule_description
            else:
                description = ""

        atoms: list[AtomDefinition] = []
        for atom in molecule.atoms:
            if (  # nofmt
                linking_bond is not None
                and atom.name in {linking_bond.atom1, linking_bond.atom2}
            ):
                name = atom.name
            else:
                name = str(atom.molecule_atom_index)
            atoms.append(
                AtomDefinition(
                    name=name,
                    synonyms=(),
                    symbol=atom.symbol,
                    leaving=bool(atom.metadata.get("leaving_atom")),
                    charge=atom.formal_charge.m_as(unit.elementary_charge),  # type: ignore
                    stereo=atom.stereochemistry,
                    aromatic=atom.is_aromatic,
                ),
            )
        bonds: list[BondDefinition] = []
        for bond in molecule.bonds:
            bonds.append(
                BondDefinition(
                    atom1=str(bond.atom1.molecule_atom_index),
                    atom2=str(bond.atom2.molecule_atom_index),
                    order=bond.bond_order,
                    aromatic=bond.is_aromatic,
                    stereo=bond.stereochemistry,
                ),
            )

        return cls(
            residue_name=None,
            description=description,
            linking_bond=linking_bond,
            crosslink=crosslink,
            atoms=tuple(atoms),
            bonds=tuple(bonds),
            virtual_sites=tuple(virtual_sites),
        )

[docs]    @classmethod
    def from_molecule(
        cls,
        molecule: Molecule,
        residue_name: str | None = None,
        description: str | None = None,
        linking_bond: BondDefinition | None = None,
        crosslink: BondDefinition | None = None,
        virtual_sites: Collection[str] = (),
    ) -> Self:
        """
        Create a ``ResidueDefinition`` from an ``openff.toolkit.Molecule``.

        Parameters
        ----------
        molecule
            The ``Molecule`` object. Canonical names are taken from the atom
            names in this object. Leaving atoms are identified from the atom
            metadata; atom's whose metadata includes a truthy value for the key
            ``"leaving_atom"`` are marked as leaving atoms. Synonyms are never
            set.
        residue_name
            The 3-letter code used to identify the residue in a PDB file. If
            ``None``, takes name from atom's ``"residue_name"`` metadata entry,
            or raises ``ValueError`` if they do not all agree. See also
            :py:data:`openff.pablo.ResidueDefinition.residue_name`
        linking_bond
            Residue linking bond. May be taken from ``molecule``
            ``"linking_bond"`` property if ``None``. See
            :py:data:`openff.pablo.ResidueDefinition.linking_bond`
        crosslink
            Residue crosslink. May be taken from ``molecule`` ``"crosslink"``
            property if ``None``. See
            :py:data:`openff.pablo.ResidueDefinition.crosslink`
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        description
            An optional string describing the residue. Taken from ``molecule``
            ``"description"`` property if ``None``. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        """
        if residue_name is None:
            atom_residue_names = {
                atom.metadata.get("residue_name", None) for atom in molecule.atoms
            }
            if len(atom_residue_names) == 1:
                atom_residue_name = unwrap(atom_residue_names)
                if isinstance(atom_residue_name, str):
                    residue_name = atom_residue_name
                else:
                    raise PabloError("could not infer residue name from atom metadata")
            else:
                raise PabloError("could not infer residue name from atom metadata")

        if crosslink is None:
            molecule_crosslink = molecule.properties.get("crosslink")
            if isinstance(molecule_crosslink, BondDefinition):
                crosslink = molecule_crosslink

        if linking_bond is None:
            molecule_linking_bond = molecule.properties.get("crosslink")
            if isinstance(molecule_linking_bond, BondDefinition):
                linking_bond = molecule_linking_bond

        if description is None:
            molecule_description = molecule.properties.get("description")
            if isinstance(molecule_description, str):
                description = molecule_description
            else:
                description = ""

        atoms: list[AtomDefinition] = []
        for atom in molecule.atoms:
            synonyms_str = str(atom.metadata.get("synonyms", ""))
            atoms.append(
                AtomDefinition(
                    name=atom.name,
                    synonyms=tuple(synonyms_str.split()),
                    symbol=atom.symbol,
                    leaving=bool(atom.metadata.get("leaving_atom")),
                    charge=atom.formal_charge.m_as(unit.elementary_charge),  # type: ignore
                    stereo=atom.stereochemistry,
                    aromatic=atom.is_aromatic,
                ),
            )
        bonds: list[BondDefinition] = []
        for bond in molecule.bonds:
            bonds.append(
                BondDefinition(
                    atom1=bond.atom1.name,
                    atom2=bond.atom2.name,
                    order=bond.bond_order,
                    aromatic=bond.is_aromatic,
                    stereo=bond.stereochemistry,
                ),
            )

        return cls(
            residue_name=residue_name,
            description=description,
            linking_bond=linking_bond,
            crosslink=crosslink,
            atoms=tuple(atoms),
            bonds=tuple(bonds),
            virtual_sites=tuple(virtual_sites),
        )

[docs]    @classmethod
    def from_capped_molecule(
        cls,
        molecule: Molecule,
        residue_name: str,
        leaving_atom_indices: Collection[int],
        linking_bond: BondDefinition,
        crosslink: BondDefinition | None = None,
        virtual_sites: Collection[str] = (),
        description: str | None = None,
    ) -> Self:
        """
        Create a linking ``ResidueDefinition`` from an ``openff.toolkit.Molecule``

        Parameters
        ----------
        residue_name
            The 3-letter code used to identify the residue in a PDB file. See
            :py:data:`openff.pablo.ResidueDefinition.residue_name`
        molecule
            The ``Molecule`` object. Canonical names are taken from the atom
            names in this object. Synonyms are never set.
        leaving_atom_indices
            Indices of atoms within the ``molecule`` argument that should be
            marked as leaving atoms. See
            :py:data:`openff.pablo.residue.AtomDefinition.leaving`
        linking_bond
            The bond linking this residue to its neighbours in a polymer. See
            :py:data:`openff.pablo.ResidueDefinition.linking_bond`
        crosslink
            See :py:data:`openff.pablo.ResidueDefinition.crosslink`
        description
            An optional string describing the residue. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        """
        molecule = deepcopy(molecule)
        for i in leaving_atom_indices:
            molecule.atom(i).metadata["leaving_atom"] = True
        return cls.from_molecule(
            residue_name=residue_name,
            molecule=molecule,
            linking_bond=linking_bond,
            crosslink=crosslink,
            description=description,
            virtual_sites=virtual_sites,
        )

[docs]    @classmethod
    def anon_from_sdf(
        cls,
        file: str | Path | TextIO,
        description: str | None = None,
    ) -> Self:
        """
        Create an anonymous residue definition from an SDF file of a single molecule
        """
        mol = Molecule.from_file(
            file,
            file_format="SDF",
            toolkit_registry=RDKitToolkitWrapper(),
        )
        if not isinstance(mol, Molecule):
            mol = unwrap(mol)
        if description is None:
            description = str(file)
            if len(description) > 30:
                description = description[:3] + "..." + description[-23:]
        return cls.anon_from_molecule(mol, description=description)

[docs]    @classmethod
    def anon_from_smiles(
        cls,
        smiles: str,
        leaving_atoms: Collection[int] = (),
        virtual_sites: Collection[str] = (),
        description: str | None = None,
    ) -> Self:
        """
        Create an anonymous ``ResidueDefinition`` from a SMILES string.

        Parameters
        ----------
        smiles
            The SMILES string.
        leaving_atoms
            SMILES string mapping numbers for atoms that should be marked as
            leaving atoms.
        description
            An optional string describing the residue. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        """
        molecule = Molecule.from_smiles(
            smiles,
            allow_undefined_stereo=True,
        )
        leaving_atom_indices = {i - 1 for i in leaving_atoms}

        for i, atom in enumerate(molecule.atoms):
            if i in leaving_atom_indices:
                atom.metadata["leaving_atom"] = True

        return cls.anon_from_molecule(
            molecule=molecule,
            description=smiles if description is None else description,
            virtual_sites=virtual_sites,
        )

[docs]    @classmethod
    def anon_from_smiles_marked_leaving(
        cls,
        smiles: str,
        *,
        virtual_sites: Collection[str] = (),
        description: str | None = None,
    ) -> Self:
        """
        Create an anonymous ``ResidueDefinition`` from a partially mapped SMILES.

        The mapped atoms are treated as leaving atoms.

        Parameters
        ----------
        smiles
            The SMILES string. Leaving atoms should be mapped. All other atoms
            must not be.
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        description
            An optional string describing the residue. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        """
        molecule = Molecule.from_smiles(
            smiles,
            allow_undefined_stereo=True,
        )

        neighbours: DefaultDict[int, set[int]] = defaultdict(set)
        for atom1, atom2 in molecule.nth_degree_neighbors(1):
            neighbours[atom1.molecule_atom_index].add(atom2.molecule_atom_index)
            neighbours[atom2.molecule_atom_index].add(atom1.molecule_atom_index)

        for i in molecule.properties["atom_map"]:
            atom = molecule.atom(i)
            atom.metadata["leaving_atom"] = True

        return cls.anon_from_molecule(
            molecule=molecule,
            description=smiles if description is None else description,
            virtual_sites=virtual_sites,
        )

[docs]    @classmethod
    def from_smiles(
        cls,
        mapped_smiles: str,
        atom_names: Mapping[int, str],
        residue_name: str,
        leaving_atoms: Collection[int] = (),
        linking_bond: BondDefinition | None = None,
        crosslink: BondDefinition | None = None,
        virtual_sites: Collection[str] = (),
        description: str | None = None,
    ) -> Self:
        """
        Create a ``ResidueDefinition`` from a mapped SMILES string.

        Parameters
        ----------
        residue_name
            The 3-letter code used to identify the residue in a PDB file. See
            :py:data:`openff.pablo.ResidueDefinition.residue_name`
        mapped_smiles
            The SMILES string. All atoms must be explicitly included with
            contiguous mapping numbers starting at 1.
        atom_names
            Mapping from SMILES string mapping numbers to the canonical atom
            name. Note that this refers to numbers in the actual SMILES string,
            and so keys should be contiguous integers starting at 1. Atom names
            must be unique.
        leaving_atoms
            SMILES string mapping numbers for atoms that should be marked as
            leaving atoms.
        linking_bond
            The bond linking this residue to its neighbours in a polymer. See
            :py:data:`openff.pablo.ResidueDefinition.linking_bond`
        crosslink
            See :py:data:`openff.pablo.ResidueDefinition.crosslink`
        description
            An optional string describing the residue. See
            :py:data:`openff.pablo.ResidueDefinition.description`
        virtual_sites
            Virtual sites expected by the residue. See
            :py:data:`openff.pablo.ResidueDefinition.virtual_sites`
        """
        molecule = Molecule.from_mapped_smiles(
            mapped_smiles,
            allow_undefined_stereo=True,
        )
        leaving_atom_indices = {i - 1 for i in leaving_atoms}
        index_to_atom_name = {i - 1: name for i, name in atom_names.items()}

        if len(index_to_atom_name) != molecule.n_atoms:
            raise PabloError("Should be an atom name for each atom in SMILES string")
        if set(index_to_atom_name.keys()) != set(range(molecule.n_atoms)):
            raise PabloError(
                "Keys of atom_names should be contiguous integers starting at 1",
            )

        for i, atom in enumerate(molecule.atoms):
            if i in leaving_atom_indices:
                atom.metadata["leaving_atom"] = True
            atom.name = index_to_atom_name[i]

        return cls.from_molecule(
            residue_name=residue_name,
            molecule=molecule,
            linking_bond=linking_bond,
            description=mapped_smiles if description is None else description,
            crosslink=crosslink,
            virtual_sites=virtual_sites,
        )

[docs]    def to_openff_molecule(self) -> Molecule:
        """Create a ``Molecule`` representing the complete, unlinked residue."""
        molecule = Molecule()
        atoms: dict[str, int] = {}
        for atom in self.atoms:
            atoms[atom.name] = molecule.add_atom(
                atomic_number=elements.NUMBERS[atom.symbol],
                formal_charge=atom.charge,
                is_aromatic=atom.aromatic,
                stereochemistry=atom.stereo,
                name=atom.name,
                metadata={
                    "residue_name": (
                        "" if self.residue_name is None else self.residue_name
                    ),
                    "leaving_atom": atom.leaving,
                    "substructure_atom": not atom.leaving,
                    "synonyms": " ".join(atom.synonyms),
                },
            )

        for bond in self.bonds:
            molecule.add_bond(
                atom1=atoms[bond.atom1],
                atom2=atoms[bond.atom2],
                bond_order=bond.order,
                is_aromatic=bond.aromatic,
                stereochemistry=bond.stereo,
            )

        molecule.properties.update(
            {
                "linking_bond": self.linking_bond,
                "crosslink": self.crosslink,
                "description": self.description,
            },
        )

        return molecule

    @cached_property
    def canonical_name_to_atom(self) -> dict[str, AtomDefinition]:
        """Map from each atoms' name and synonyms to the atom definition."""
        return {atom.name: atom for atom in self.atoms}

    @cached_property
    def name_to_atom(self) -> dict[str, AtomDefinition]:
        """Map from each atoms' name and synonyms to the atom definition."""
        mapping = self.canonical_name_to_atom
        canonical_names = set(mapping)
        for atom in self.atoms:
            for synonym in atom.synonyms:
                if synonym in mapping and mapping[synonym] != atom:
                    raise PabloError(
                        f"synonym {synonym} degenerately defined for canonical"
                        + f" names {mapping[synonym].name} and {atom.name} in"
                        + f" residue {self.residue_name}",
                    )
                if synonym in canonical_names:
                    raise PabloError(
                        f"synonym {synonym} of atom {atom.name} clashes with"
                        + f" another canonical name in residue {self.residue_name}",
                    )
                mapping[synonym] = atom
        return mapping

[docs]    def atoms_bonded_to(self, atom_name: str) -> Iterator[str]:
        """
        Iterator over atoms bonded to the one with the given canonical name.
        """
        yield from (bond.atom2 for bond in self.bonds_to(atom_name))

[docs]    def bonds_to(self, atom_name: str) -> Iterator[BondDefinition]:
        """
        All bonds to the given atom, ordered with ``atom1`` as the given atom.
        """
        for bond in self.bonds:
            if bond.atom1 == atom_name:
                yield bond
            if bond.atom2 == atom_name:
                yield bond.flipped()

    def _leaving_fragment_of(self, linking_atom: str) -> Iterator[str]:
        atoms_to_check = list(self.atoms_bonded_to(linking_atom))
        checked_atoms: set[str] = set()
        while atoms_to_check:
            atom_name = atoms_to_check.pop()
            if self.name_to_atom[atom_name].leaving:
                yield atom_name
                atoms_to_check.extend(
                    filter(
                        lambda x: x not in checked_atoms,
                        self.atoms_bonded_to(atom_name),
                    ),
                )
            checked_atoms.add(atom_name)
        if (
            linking_atom in self.canonical_name_to_atom
            and self.canonical_name_to_atom[linking_atom].leaving
            and linking_atom not in checked_atoms
        ):
            yield linking_atom

    @cached_property
    def _posterior_bond_leaving_atoms(self) -> set[str]:
        return (
            set()
            if self.linking_bond is None
            else set(self._leaving_fragment_of(self._posterior_bond_linking_atom))
        )

    @cached_property
    def _prior_bond_leaving_atoms(self) -> set[str]:
        return (
            set()
            if self.linking_bond is None
            else set(self._leaving_fragment_of(self._prior_bond_linking_atom))
        )

    @cached_property
    def _crosslink_leaving_atoms(self) -> set[str]:
        return (
            set()
            if self.crosslink is None
            else set(self._leaving_fragment_of(self.crosslink.atom1))
        )

    @property
    def _prior_bond_linking_atom(self) -> str:
        if self.linking_bond is None:
            raise PabloError("not a linking residue")
        return self.linking_bond.atom2

    @property
    def _posterior_bond_linking_atom(self) -> str:
        if self.linking_bond is None:
            raise PabloError("not a linking residue")
        return self.linking_bond.atom1

    @property
    def _can_form_prior_bond(self) -> bool:
        return (
            self.linking_bond is not None
            and self._prior_bond_linking_atom in self.canonical_name_to_atom
            and len(self._prior_bond_leaving_atoms) > 0
        )

    @property
    def _can_form_posterior_bond(self) -> bool:
        return (
            self.linking_bond is not None
            and self._posterior_bond_linking_atom in self.canonical_name_to_atom
            and len(self._posterior_bond_leaving_atoms) > 0
        )

    @property
    def _can_form_crosslink(self) -> bool:
        return (
            self.crosslink is not None
            and self.crosslink.atom1 in self.canonical_name_to_atom
            and len(self._crosslink_leaving_atoms) > 0
        )

    def _is_isomorphic_to(self, other: Self) -> bool:
        """Atom and bond ordering insensitive equality test"""
        return all(
            [
                other.linking_bond == self.linking_bond,
                other.crosslink == self.crosslink,
                set(other.atoms) == set(self.atoms),
                set(other.bonds) == set(self.bonds),
            ],
        )

[docs]    def deprotonated_at(
        self,
        name: str,
    ) -> Self:
        """Return a copy of ``self`` with proton ``name`` abstracted.

        The hydrogen with canonical name ``name`` is removed, and the formal
        charge of the atom bonded to it is decremented. If the named atom is not
        hydrogen, is missing, or is bonded to a number of other atoms other than
        1, ``PabloError`` is raised.
        """
        try:
            atom = self.canonical_name_to_atom[name]
        except KeyError:
            raise PabloError(f"Cannot deprotonate missing atom {name}")

        if atom.symbol != "H":
            raise PabloError(
                f"Cannot deprotonate non-hydrogen atom {name}: {atom.symbol=}",
            )

        neighbours = list(self.atoms_bonded_to(name))
        if len(neighbours) != 1:
            raise PabloError(
                f"Cannot deprotonate atom {name} bonded to {len(neighbours)} other atoms",
            )
        neighbour = neighbours[0]

        return self.replace(
            bonds=[bond for bond in self.bonds if name not in [bond.atom1, bond.atom2]],
            atoms=[
                (
                    atom.replace(charge=atom.charge - 1)
                    if atom.name == neighbour
                    else atom
                )
                for atom in self.atoms
                if atom.name != name
            ],
            description=self.description + f" -{name}",
        )

[docs]    def protonated_at(
        self,
        heavy_atom_name: str,
        proton_name: str,
        ignore_synonym_clashes: bool = False,
    ) -> Self:
        """Return a copy of ``self`` with ``heavy_atom_name`` protonated by ``proton_name``.

        The formal charge of the atom with canonical name ``heavy_atom_name`` is
        incremented, a new hydrogen atom named ``proton_name`` is added, and a
        bond is created between the two named atoms.  If the heavy atom is
        missing or the new proton name clashes with an existing name or synonym,
        ``PabloError`` is raised. The new atom has the same value for the
        ``leaving`` attribute as the heavy atom. The synonym clash check may be
        suppressed with the ``ignore_synonym_clashes`` argument, but the
        canonical names are always checked.
        """
        try:
            heavy_atom = self.canonical_name_to_atom[heavy_atom_name]
        except KeyError:
            raise PabloError(f"Cannot protonate missing atom {heavy_atom_name}")
        if any(
            [
                (ignore_synonym_clashes and proton_name in self.canonical_name_to_atom),
                (not ignore_synonym_clashes and proton_name in self.name_to_atom),
            ],
        ):
            raise PabloError(
                f"name {proton_name} clashes with existing name in residue",
            )

        return self.replace(
            bonds=[
                *self.bonds,
                BondDefinition.with_defaults(heavy_atom_name, proton_name),
            ],
            atoms=[
                *(
                    (
                        atom.replace(charge=atom.charge + 1)
                        if atom.name == heavy_atom_name
                        else atom
                    )
                    for atom in self.atoms
                ),
                AtomDefinition.with_defaults(
                    proton_name,
                    "H",
                    leaving=heavy_atom.leaving,
                ),
            ],
            description=self.description + f" +{proton_name}",
        )

[docs]    def vary_protonation(
        self,
        *,
        acidic: Iterable[str] = (),
        # Each element specifies an atom name to remove, decrementing the formal
        # charge on the neighbouring heavy atom. Multiply bonded or non-hydrogen
        # atoms raise an error.
        basic: Iterable[tuple[str, str]] = (),
        # Each tuple specifies an atom name to protonate (increment the formal
        # charge and form a bond) and the name of the added proton
        ignore_synonym_clashes: bool = False,
        skip_errors: bool = False,
    ) -> list[Self]:
        """
        Compute all combinations of the specified protonation variants

        The first element of the returned list is guaranteed to be the unmodified
        starting residue definition; to get a list of just the new variants, use
        ``resdef.vary_protonation(...)[1:]``.

        Note that all combinations of protonations and deprotonations are
        generated; this means that if ``acidic`` has length ``n`` and ``basic``
        has length ``m``, ``2**(n+m)`` variants will be generated.

        Parameters
        ==========
        acidic
            Each element specifies an atom name to remove, decrementing the
            formal charge on the neighbouring heavy atom. Multiply bonded,
            unbonded, missing or non-hydrogen atoms raise ``PabloError`` unless
            ``skip_errors`` is ``True``.
        basic
            Each tuple specifies an atom name to protonate (increment the formal
            charge and form a bond) and the name of the added proton. Missing
            heavy atoms or new atom names that clash with existing names raise
            ``PabloError`` unless ``skip_errors`` is ``True``.
        ignore_synonym_clashes
            If set to ``True``, protons added by the ``basic`` argument may have
            names that clash with the synonyms of other atoms. This can be
            useful in the early stages of a multi-step residue definition
            patching process. Added protons may never have names that clash
            with the canonical names of other atoms.
        skip_errors
            If set to ``True``, missing, clashing, or improperly bonded atoms
            are skipped rather than raising ``PabloError``.
        """
        variants = [self]
        for proton_to_remove in acidic:
            for variant in list(variants):
                try:
                    variants.append(
                        variant.deprotonated_at(
                            proton_to_remove,
                        ),
                    )
                except PabloError as err:
                    if skip_errors:
                        continue
                    else:
                        raise err from None
        for atom_to_protonate, name_of_proton in basic:
            for variant in list(variants):
                try:
                    variants.append(
                        variant.protonated_at(
                            atom_to_protonate,
                            name_of_proton,
                            ignore_synonym_clashes=ignore_synonym_clashes,
                        ),
                    )
                except PabloError as err:
                    if skip_errors:
                        continue
                    else:
                        raise err from None
        return variants

[docs]    def with_synonyms(self, synonyms: Mapping[str, Sequence[str]]) -> Self:
        """Return a copy of self with the given synonyms added

        ``synonyms`` is a mapping from canonical atom names to its additional
        synonyms. Existing synonyms are preserved.
        """
        return self.replace(
            atoms=(
                atom.replace(
                    synonyms={
                        *atom.synonyms,
                        *synonyms.get(
                            atom.name,
                            [],
                        ),
                    },
                )
                for atom in self.atoms
            ),
        )

    def _missing_atoms_are_valid_leaving_atoms(
        self,
        missing_atom_names: Collection[str],
    ) -> bool:
        """
        ``True`` if the missing atom names form a valid set of leaving atoms, ``False`` if not.

        A valid set must all be marked as leaving atoms and must all be
        assignable to a linking or crosslinking bond.
        """
        missing_atom_names = set(missing_atom_names)
        return (
            (
                missing_atom_names.issuperset(
                    self._prior_bond_leaving_atoms,
                )
                or missing_atom_names.isdisjoint(
                    self._prior_bond_leaving_atoms,
                )
            )
            and (
                missing_atom_names.issuperset(
                    self._posterior_bond_leaving_atoms,
                )
                or missing_atom_names.isdisjoint(
                    self._posterior_bond_leaving_atoms,
                )
            )
            and (
                missing_atom_names.issuperset(
                    self._crosslink_leaving_atoms,
                )
                or missing_atom_names.isdisjoint(
                    self._crosslink_leaving_atoms,
                )
            )
        )

    def _possible_prior_bond_names_gen(self) -> Iterator[tuple[str, str]]:
        if (
            self.linking_bond is None
            or self._prior_bond_linking_atom not in self.canonical_name_to_atom
        ):
            return
        linking_atom = self.canonical_name_to_atom[self._prior_bond_linking_atom]
        for linking_name in (linking_atom.name, *linking_atom.synonyms):
            yield (self._posterior_bond_linking_atom, linking_name)

    @cached_property
    def _possible_prior_bond_names(self) -> set[tuple[str, str]]:
        return set(self._possible_prior_bond_names_gen())

    def _possible_posterior_bond_names_gen(self) -> Iterator[tuple[str, str]]:
        if (
            self.linking_bond is None
            or self._posterior_bond_linking_atom not in self.canonical_name_to_atom
        ):
            return
        linking_atom = self.canonical_name_to_atom[self._posterior_bond_linking_atom]
        for linking_name in (linking_atom.name, *linking_atom.synonyms):
            yield (linking_name, self._prior_bond_linking_atom)

    @cached_property
    def _possible_posterior_bond_names(self) -> set[tuple[str, str]]:
        return set(self._possible_posterior_bond_names_gen())

    def _possible_crosslink_bond_names_gen(self) -> Iterator[tuple[str, str]]:
        if (
            self.crosslink is None
            or self.crosslink.atom1 not in self.canonical_name_to_atom
        ):
            return
        linking_atom = self.canonical_name_to_atom[self.crosslink.atom1]
        for linking_name in (linking_atom.name, *linking_atom.synonyms):
            yield (linking_name, self.crosslink.atom2)

    @cached_property
    def _possible_crosslink_bond_names(self) -> set[tuple[str, str]]:
        return set(self._possible_crosslink_bond_names_gen())

    def _unassigned_leaving_atoms(self) -> set[str]:
        all_leaving_atoms = {atom.name for atom in self.atoms if atom.leaving}
        assigned_leaving_atoms = (
            self._prior_bond_leaving_atoms
            | self._posterior_bond_leaving_atoms
            | self._crosslink_leaving_atoms
        )
        return all_leaving_atoms.difference(assigned_leaving_atoms)

    def _unassigned_leaving_atom_fragments(self) -> Iterator[tuple[str, ...]]:
        yield from {
            tuple(sorted(set(self._leaving_fragment_of(name))))
            for name in self._unassigned_leaving_atoms()
        }

    def _leaving_atom_fragments(self) -> Iterator[tuple[str, ...]]:
        yield from {
            tuple(sorted(set(self._leaving_fragment_of(atom.name))))
            for atom in self.atoms
            if atom.leaving
        }

    def _to_core_graph(self) -> Graph[AtomDefinition, BondDefinition]:
        graph: Graph[AtomDefinition, BondDefinition] = Graph(
            node_count_hint=self.n_core_atoms,
            edge_count_hint=len(self.bonds),
        )
        graph.add_nodes_from(atom for atom in self.atoms if not atom.leaving)
        for bond in self.bonds:
            atom1 = self.canonical_name_to_atom[bond.atom1]
            atom2 = self.canonical_name_to_atom[bond.atom2]
            if not (atom1.leaving or atom2.leaving):
                graph.add_edge(atom1, atom2, bond)
        return graph

    def _to_graphs(self) -> Iterable[Graph[AtomDefinition, BondDefinition]]:
        """
        Construct graphs of all possible arrangements of matching atoms.

        Yields one graph per combination of crosslink, posterior bond, and prior
        bond, and combination of unassigned leaving atoms. Atoms in the final
        graphs that are not a part of the substructure, ie. non-physical
        "leaving" atoms, are represented with element symbol
        ``""``.

        """
        for crosslink in {self.crosslink is not None, False}:
            for posterior_bond in {self.linking_bond is not None, False}:
                for prior_bond in {self.linking_bond is not None, False}:
                    leaving_atom_fragments = [
                        *self._unassigned_leaving_atom_fragments(),
                    ]
                    for leaving_atoms in flatten(
                        map(
                            lambda x: tuple(flatten(x)),
                            itertools.combinations(leaving_atom_fragments, i),
                        )
                        for i in range(len(leaving_atom_fragments) + 1)
                    ):
                        logging.debug(
                            f"Generating graph for {self.description}:"
                            + f" {crosslink=} {posterior_bond=} {prior_bond=} {leaving_atoms=}",
                        )
                        yield self._to_graph(
                            crosslinked=crosslink,
                            posterior_bonded=posterior_bond,
                            prior_bonded=prior_bond,
                            left_atoms=leaving_atoms,
                        )

    def _to_graph(
        self,
        crosslinked: bool = False,
        prior_bonded: bool = False,
        posterior_bonded: bool = False,
        left_atoms: Sequence[str] = (),
    ) -> Graph[AtomDefinition, BondDefinition]:
        graph = self._to_core_graph()
        restore_leaving_atoms: set[str] = (  # nofmt
            self._unassigned_leaving_atoms() - set(left_atoms)
        )

        # Add the non-matching (left) leaving atoms that bond to core atoms
        # along with their corresponding bonds
        for name in left_atoms:
            atom = self.name_to_atom[name]
            assert atom.leaving
            linking_bonds = [
                bond
                for bond in self.bonds_to(name)
                if not self.name_to_atom[bond.atom2].leaving
            ]
            if len(linking_bonds) == 0:
                continue
            target_name = f"NON_MATCHING_ATOM_{name}"
            link_target = AtomDefinition.with_defaults(target_name, "", leaving=True)
            graph.add_node(link_target)
            graph.add_edges_from(
                (
                    link_target,
                    self.name_to_atom[bond.atom2],
                    bond.replace(atom1=target_name),
                )
                for bond in linking_bonds
            )

        crosslink = self.crosslink if crosslinked else None
        prior_bond = self.linking_bond if prior_bonded else None
        posterior_bond = self.linking_bond if posterior_bonded else None

        if crosslink is not None:
            crosslink_target = AtomDefinition.with_defaults(
                crosslink.atom2,
                "",
                leaving=True,
            )
            graph.add_node(crosslink_target)
            graph.add_edge(
                self.canonical_name_to_atom[crosslink.atom1],
                crosslink_target,
                crosslink,
            )
        else:
            restore_leaving_atoms.update(self._crosslink_leaving_atoms)

        if posterior_bond is not None:
            posterior_target = AtomDefinition.with_defaults(
                posterior_bond.atom2,
                "",
                leaving=True,
            )
            graph.add_node(posterior_target)
            graph.add_edge(
                self.canonical_name_to_atom[posterior_bond.atom1],
                posterior_target,
                posterior_bond,
            )
        else:
            restore_leaving_atoms.update(self._posterior_bond_leaving_atoms)

        if prior_bond is not None:
            prior_target = AtomDefinition.with_defaults(
                prior_bond.atom1,
                "",
                leaving=True,
            )
            graph.add_node(prior_target)
            graph.add_edge(
                self.canonical_name_to_atom[prior_bond.atom2],
                prior_target,
                prior_bond,
            )
        else:
            restore_leaving_atoms.update(self._prior_bond_leaving_atoms)

        added_nodes = {atom.name: atom for atom in graph.nodes}
        for leaving_name in restore_leaving_atoms:
            try:
                bond = unwrap(
                    bond
                    for bond in self.bonds
                    if (bond.atom1 in added_nodes and bond.atom2 == leaving_name)
                    or (bond.atom2 in added_nodes and bond.atom1 == leaving_name)
                )
            except ValueError:
                continue

            leaving_atom = self.canonical_name_to_atom[leaving_name].replace(
                leaving=False,
            )
            graph.add_node(leaving_atom)
            assert leaving_name not in added_nodes
            added_nodes[leaving_name] = leaving_atom

            if bond.atom1 == leaving_name:
                graph.add_edge(
                    leaving_atom,
                    added_nodes[bond.atom2],
                    bond,
                )
            else:
                graph.add_edge(
                    added_nodes[bond.atom1],
                    leaving_atom,
                    bond,
                )

        assert graph.is_connected()
        return graph

[docs]    def visualize(
        self,
        *,
        width: int = -1,
        height: int = 300,
        highlight: Iterable[str] = (),
    ) -> "IPython.core.display.SVG":
        from IPython.display import SVG

        offmol = self.to_openff_molecule()

        highlight = set(highlight)
        highlight_atoms = [
            i for i, atom in enumerate(offmol.atoms) if atom.name in highlight
        ]

        deemphasize_atoms = [i for i, atom in enumerate(self.atoms) if atom.leaving]

        legend_elements: list[str] = []

        if self.is_anonymous:
            atom_notes = {}
            legend_elements.append("Anonymous Residue Definition")
            if len(deemphasize_atoms) != 0:
                legend_elements.append("(leaving atoms drawn in light grey)")
        else:
            atom_notes = {
                i: f"{'|'.join([atom.name, *atom.synonyms])}{'*' if atom.leaving else ''}"
                for i, atom in enumerate(self.atoms)
            }
            if self.description:
                legend_elements.append(f"{self.residue_name}: {self.description}")
            else:
                legend_elements.append(f"{self.residue_name}")

            if len(deemphasize_atoms) != 0:
                legend_elements.append("(*leaving atoms, drawn in light grey)")

        bond_notes: dict[tuple[int, int], str] = {}
        linkages: list[tuple[str, str, str]] = []
        if self._can_form_posterior_bond:
            linkages.append(
                (
                    "posterior",
                    self._posterior_bond_linking_atom,
                    self._prior_bond_linking_atom,
                ),
            )
        if self._can_form_prior_bond:
            linkages.append(
                (
                    "prior",
                    self._prior_bond_linking_atom,
                    self._posterior_bond_linking_atom,
                ),
            )
        if self.crosslink is not None and self._can_form_crosslink:
            linkages.append(("crosslink", self.crosslink.atom1, self.crosslink.atom2))
        for linkage, linking_atom, partner_atom in linkages:
            bond = unwrap(
                bond
                for bond in self.bonds
                if (
                    bond.atom1 == linking_atom
                    and self.canonical_name_to_atom[bond.atom2].leaving
                )
                or (
                    bond.atom2 == linking_atom
                    and self.canonical_name_to_atom[bond.atom1].leaving
                )
            )
            atom1_idx = unwrap(
                i for i, atom in enumerate(offmol.atoms) if atom.name == bond.atom1
            )
            atom2_idx = unwrap(
                i for i, atom in enumerate(offmol.atoms) if atom.name == bond.atom2
            )

            bond_notes[atom1_idx, atom2_idx] = f"{linkage} to '{partner_atom}'"

        svg_content = draw_molecule(
            offmol,
            width=width,
            height=height,
            atom_notes=atom_notes,
            bond_notes=bond_notes,
            deemphasize_atoms=deemphasize_atoms or None,
            legend=" ".join(legend_elements),
            highlight_atoms=highlight_atoms or None,
        )
        return SVG(data=svg_content)

[docs]    @staticmethod
    def react(
        reactants: Sequence["ResidueDefinition"],
        reactant_smarts: Sequence[str],
        product_smarts: Sequence[str],
        product_residue_names: Sequence[str | None] | None = None,
        product_descriptions: Sequence[str | None] | None = None,
        product_linking_bonds: Sequence[BondDefinition | None] | None = None,
        product_crosslinks: Sequence[BondDefinition | None] | None = None,
        product_virtual_sites: Sequence[Collection[str]] | None = None,
    ) -> list[tuple["ResidueDefinition", ...]]:
        """
        Perform a SMARTS reaction on some residue definitions

        Parameters
        ==========
        reactants
            The residue definitions involved in the reaction. Names and synonyms
            for the products are taken from these. Chemical information is also
            derived from these, but may be modified by the reaction.
        reactant_smarts
            Mapped SMARTS patterns identifying atoms in the reactants that will
            be involved in the reaction. There may be more or fewer of these
            than reactants to allow for complex and intramolecular reactions.
        product_smarts
            Mapped SMARTS patterns describing any altered chemistry in the
            products. All atom mappings in the reactant smarts must be present
            in a product SMARTS. Each product SMARTS corresponds to a single
            product ``ResidueDefinition``.
        product_residue_names
            The residue names of the products. One for each product SMARTS. May
            be ``None`` or omitted, in which case the product is anonymous ---
            note that this means atom names in the reactants are discarded. If
            there are fewer residue names than product SMARTS, any unnamed
            products are made anonymous.
        product_descriptions
            The descriptions of the products. If there are fewer descriptions
            than product SMARTS, the given descriptions are assigned to the
            first products.
        product_linking_bonds
            The linking bonds of the products. If there are fewer linking bonds
            than product SMARTS, the given linking bonds are assigned to the
            first products. Must be ``None`` or omitted if the product is
            anonymous.
        product_crosslinks
            The crosslinks of the products. If there are fewer crosslinks
            than product SMARTS, the given crosslinks are assigned to the
            first products. Must be ``None`` or omitted if the product is
            anonymous.
        product_virtual_sites
            The virtual sites of the products. If there are fewer virtual sites
            than product SMARTS, the given virtual sites are assigned to the
            first products.
        """

        def validate_product_arg[T, U](
            argname: str,
            argvalue: Sequence[T | U] | None,
            nonevalue: U,
            product_value_forbidden: Sequence[bool] | None = None,
        ) -> Sequence[T | U]:
            if argvalue is None:
                argvalue = [nonevalue] * len(product_smarts)
            elif len(argvalue) < len(product_smarts):
                argvalue = list(argvalue) + [nonevalue] * (
                    len(product_smarts) - len(argvalue)
                )
            elif len(argvalue) > len(product_smarts):
                raise PabloError(
                    f"too many {argname}: expected at most {len(product_smarts)}",
                )

            if product_value_forbidden is not None:
                for i, (forbidden, value) in enumerate(
                    zip(product_value_forbidden, argvalue),
                ):
                    if forbidden and value is not nonevalue:
                        raise PabloError(
                            f"{argname}[{i}] is {value} but must be {nonevalue}"
                            + " for anonymous product",
                        )

            return argvalue

        product_residue_names = validate_product_arg(
            "product_residue_names",
            product_residue_names,
            None,
        )
        product_is_anon = [name is None for name in product_residue_names]
        product_descriptions = validate_product_arg(
            "product_descriptions",
            product_descriptions,
            None,
        )
        product_linking_bonds = validate_product_arg(
            "product_linking_bonds",
            product_linking_bonds,
            None,
            product_value_forbidden=product_is_anon,
        )
        product_crosslinks = validate_product_arg(
            "product_crosslinks",
            product_crosslinks,
            None,
            product_value_forbidden=product_is_anon,
        )
        product_virtual_sites = validate_product_arg(
            "product_virtual_sites",
            product_virtual_sites,
            (),
        )

        reactant_offmols = [resdef.to_openff_molecule() for resdef in reactants]
        reaction = ".".join(reactant_smarts) + ">>" + ".".join(product_smarts)

        all_products = list(react(reactant_offmols, reaction))
        for products in all_products:
            if len(products) != len(product_smarts):
                raise PabloError(
                    f"expected {len(product_smarts)} products, got {len(products)}:"
                    + " each product_smarts should include only one fragment",
                )

        return [
            tuple(
                ResidueDefinition.from_molecule(
                    offmol,
                    residue_name=product_residue_names[i],
                    description=product_descriptions[i],
                    linking_bond=product_linking_bonds[i],
                    crosslink=product_crosslinks[i],
                    virtual_sites=product_virtual_sites[i],
                )
                if product_residue_names[i] is not None
                else ResidueDefinition.anon_from_molecule(
                    offmol,
                    description=product_descriptions[i],
                    virtual_sites=product_virtual_sites[i],
                )
                for i, offmol in enumerate(products)
            )
            for products in all_products
        ]