"""
Patches to add essential features to the CCD.
"""
from openff.pablo.chem import DISULFIDE_BOND
from openff.pablo.exceptions import PabloError
from .._utils import unwrap
from ..residue import (
AtomDefinition,
BondDefinition,
ResidueDefinition,
)
from ._ccdcache import PEPTIDE_BOND
__all__ = [
"ACIDIC_PROTONS",
"BASIC_ATOMS",
"ATOM_NAME_SYNONYMS",
"fix_caps",
"add_protonation_variants",
"add_synonyms",
"disambiguate_alt_ids",
"add_disulfide_crosslink",
"add_dephosphorylated_5p_terminus",
"set_hop3_leaving",
"patch_his_sidechain_zwitterion",
"delete_doubly_deprotonated_arginine",
"add_nh2_leaving_atom",
"strip_linkless_leavers",
]
ACIDIC_PROTONS: dict[str, list[str]] = {
"ALA": ["HXT", "H2"],
"ARG": ["HXT", "H2", "HH12", "HH22"],
"ASN": ["HXT", "H2"],
"ASP": ["HXT", "H2", "HD2"],
"CYS": ["HXT", "H2", "HG"],
"GLN": ["HXT", "H2"],
"GLU": ["HXT", "H2", "HE2"],
"GLY": ["HXT", "H2"],
# TODO: Special case HIS so that the neutral form is not a zwitterion
"HIS": ["HXT", "H2", "HD1", "HE2"],
"ILE": ["HXT", "H2"],
"LEU": ["HXT", "H2"],
"LYS": ["HXT", "H2", "HZ3"],
"MET": ["HXT", "H2"],
"PHE": ["HXT", "H2"],
"PRO": ["HXT"],
"SER": ["HXT", "H2", "HG"],
"THR": ["HXT", "H2", "HG1"],
"TRP": ["HXT", "H2", "HE1"],
"TYR": ["HXT", "H2", "HH"],
"VAL": ["HXT", "H2"],
"DG": ["HOP2", "HOP3", "HO3'"],
"DA": ["HOP2", "HOP3", "HO3'"],
"DT": ["HOP2", "HOP3", "HO3'"],
"DC": ["HOP2", "HOP3", "HO3'"],
"G": ["HOP2", "HOP3", "HO3'"],
"A": ["HOP2", "HOP3", "HO3'"],
"U": ["HOP2", "HOP3", "HO3'"],
"C": ["HOP2", "HOP3", "HO3'"],
}
"""Map from residue name to a list of atom names of abstractable hydrogens.
Each 3-tuple specifies an atom name to remove. This atom must have exactly one
bond. A variant residue definition is created with that atom and bond removed,
and the formal charge of the bonded atom reduced by one.
Note that all combinations of deprotonations are generated; this means a residue
with ``n`` abstractable hydrogens will have ``2**n`` variants."""
BASIC_ATOMS: dict[str, list[tuple[str, str]]] = {
"ALA": [("N", "H3")],
"ARG": [("N", "H3")],
"ASN": [("N", "H3")],
"ASP": [("N", "H3")],
"CYS": [("N", "H3")],
"GLN": [("N", "H3")],
"GLU": [("N", "H3")],
"GLY": [("N", "H3")],
"HIS": [("N", "H3")],
"ILE": [("N", "H3")],
"LEU": [("N", "H3")],
"LYS": [("N", "H3")],
"MET": [("N", "H3")],
"PHE": [("N", "H3")],
"PRO": [],
"SER": [("N", "H3")],
"THR": [("N", "H3")],
"TRP": [("N", "H3")],
"TYR": [("N", "H3")],
"VAL": [("N", "H3")],
}
"""Protonation variants that add an atom to the CCD.
Each 3-tuple specifies an atom name to protonate and the name of the added
proton. A variant residue definition is created with that atom and bond added,
and the formal charge of the atom increased by one."""
ATOM_NAME_SYNONYMS = {
"NME": {
"HN2": ["H"],
"C": ["CH3"],
"H1": ["1HH3"],
"H2": ["2HH3"],
"H3": ["3HH3"],
},
"ACE": {
"H1": ["1HH3"],
"H2": ["2HH3"],
"H3": ["3HH3"],
},
"NA": {"NA": ["Na"]},
"CL": {"CL": ["Cl"]},
"ALA": {"H": ["H1"]},
"ARG": {"H": ["H1"]},
"ASN": {"H": ["H1"]},
"ASP": {"H": ["H1"]},
"CYS": {"H": ["H1"], "HB2": ["3HB"]},
"GLN": {"H": ["H1"]},
"GLU": {"H": ["H1"]},
"GLY": {"H": ["H1"]},
"HIS": {"H": ["H1"]},
"ILE": {"H": ["H1"]},
"LEU": {"H": ["H1"]},
"LYS": {"H": ["H1"]},
"MET": {"H": ["H1"]},
"PHE": {"H": ["H1"]},
"SER": {"H": ["H1"], "HB2": ["3HB"]},
"THR": {"H": ["H1"]},
"TRP": {"H": ["H1"]},
"TYR": {"H": ["H1"]},
"VAL": {"H": ["H1"]},
}
"""Map from residue name and then canonical atom name to a list of synonyms"""
[docs]def fix_caps(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Fix ``"NON-POLYMER"`` residues so they can be used as caps for peptides.
"""
return [
res.replace(
linking_bond=PEPTIDE_BOND,
atoms=[
atom.replace(
leaving=True if atom.name == "H" else atom.leaving,
)
for atom in res.atoms
],
),
]
[docs]def add_protonation_variants(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Add protonation variants from the ACIDIC_PROTONS and BASIC_ATOMS constants.
Note that all combinations of protonations and deprotonations are generated;
this means a residue with ``n`` abstractable hydrogens and ``m`` acidic atoms
will have ``2**(n+m)`` variants.
"""
assert res.residue_name is not None
return res.vary_protonation(
acidic=ACIDIC_PROTONS.get(res.residue_name, []),
basic=BASIC_ATOMS.get(res.residue_name, []),
ignore_synonym_clashes=True,
)
[docs]def add_synonyms(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Patch a residue definition to include synonyms from :py:data:`ATOM_NAME_SYNONYMS`.
"""
assert res.residue_name is not None
return [res.with_synonyms(ATOM_NAME_SYNONYMS.get(res.residue_name, {}))]
[docs]def disambiguate_alt_ids(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
CCD patch: put alt atom ids in their own residue definitions if needed
This patch should be run before other patches that add synonyms, as it
assumes that there is at most one synonym (from the CCD alt id
flag).
Some CCD residues (like GLY) have alternative atom IDs that clash with
canonical IDs for a different atom. This breaks synonyms because the
clashing alternate ID is never assigned; the PDB file is interpreted as
having two copies of the canonical ID atom. To fix this, we just split
residue definitions with this clashing problem into two definitions, one
with the canonical IDs and the other with the alternates.
Note that if a linking atom is part of a clash, this may interfere with
the disambiguated residue definition's ability to link to canonically named
residue definitions.
"""
clashes: list[int] = []
canonical_names = {atom.name for atom in res.atoms}
for i, atom in enumerate(res.atoms):
for synonym in atom.synonyms:
if synonym in canonical_names:
clashes.append(i)
if len(clashes) != 0:
old_to_new: dict[str, str] = {}
for atom in res.atoms:
if atom.synonyms:
old_to_new[atom.name] = unwrap(atom.synonyms)
else:
old_to_new[atom.name] = atom.name
res1 = res.replace(
atoms=[
atom.replace(
synonyms=[],
)
for atom in res.atoms
],
)
res2 = res.replace(
atoms=[
atom.replace(
name=old_to_new[atom.name],
synonyms=[],
)
for atom in res.atoms
],
bonds=[
bond.replace(
atom1=old_to_new[bond.atom1],
atom2=old_to_new[bond.atom2],
)
for bond in res.bonds
],
description=res.description + " altids",
crosslink=(
None
if res.crosslink is None
else res.crosslink.replace(
atom1=old_to_new[res.crosslink.atom1],
atom2=old_to_new.get(
res.crosslink.atom2,
res.crosslink.atom2,
),
)
),
linking_bond=(
None
if res.linking_bond is None
else res.linking_bond.replace(
atom1=old_to_new[res.linking_bond.atom1],
atom2=old_to_new.get(
res.linking_bond.atom2,
res.linking_bond.atom2,
),
)
),
)
return [res1, res2]
else:
return [res]
[docs]def add_disulfide_crosslink(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Add a disulfide crosslink to a thiol sulfur atom named "SG".
The leaving atom is the thiol proton and must be named "HG".
"""
if not {"HG", "SG"}.issubset({atom.name for atom in res.atoms}):
raise PabloError(
"Can only add disulfide crosslink to residue with HG and SG atoms",
)
return [
res.replace(
crosslink=DISULFIDE_BOND,
atoms=[
atom if atom.name != "HG" else atom.replace(leaving=True)
for atom in res.atoms
],
),
]
[docs]def add_dephosphorylated_5p_terminus(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Add a definition representing a dephosphorylated 5' nucleic acid terminus.
Assumes the phosphate has the standard CCD nucleic acid atom names: "P",
"HOP2", "HOP3", "OP1", "OP2", and "OP3".
"""
phosphate_names = {"P", "HOP2", "HOP3", "OP1", "OP2", "OP3"}
return [
res,
res.replace(
atoms=(
AtomDefinition(
name="HO5'",
synonyms=(),
symbol="H",
aromatic=False,
charge=0,
leaving=False,
stereo=None,
),
*(atom for atom in res.atoms if atom.name not in phosphate_names),
),
bonds=(
BondDefinition(
atom1="O5'",
atom2="HO5'",
aromatic=False,
order=1,
stereo=None,
),
*(
bond
for bond in res.bonds
if bond.atom1 not in phosphate_names
and bond.atom2 not in phosphate_names
),
),
description=(
res.description[:-17] + "-5'-PROTONATED"
if res.description.endswith("-5'-MONOPHOSPHATE")
else res.description + " 5' dephosphorylated"
),
),
]
[docs]def set_hop3_leaving(res: ResidueDefinition) -> list[ResidueDefinition]:
"""
Mark the HOP3 leaving atom in nucleic acid residues as such.
The OP3 and HOP3 atoms in nucleic acid residues are both absent when the
residue forms part of a polymer, but only OP3 is marked as leaving; HOP3
becomes a disconnected fragment.
"""
# TODO: Replace this with disconnected fragment detection in ResidueDefinition._leaving_fragment_of()
return [
res.replace(
atoms=(
atom.replace(leaving=True) if atom.name == "HOP3" else atom
for atom in res.atoms
),
),
]
[docs]def patch_his_sidechain_zwitterion(res: ResidueDefinition) -> list[ResidueDefinition]:
"""Patch the histidine side chain so that it is neutral, not zwitterionic."""
nd1_atom = res.name_to_atom.get("ND1", None)
ne2_atom = res.name_to_atom.get("NE2", None)
if (
nd1_atom is not None
and nd1_atom.charge == 1
and ne2_atom is not None
and ne2_atom.charge == -1
):
bonds: list[BondDefinition] = []
nd1_ce1_bond: BondDefinition | None = None
ne2_ce1_bond: BondDefinition | None = None
for bond in res.bonds:
if {bond.atom1, bond.atom2} == {"ND1", "CE1"}:
nd1_ce1_bond = bond
elif {bond.atom1, bond.atom2} == {"NE2", "CE1"}:
ne2_ce1_bond = bond
else:
bonds.append(bond)
if (
nd1_ce1_bond is None
or nd1_ce1_bond.order != 2
or ne2_ce1_bond is None
or ne2_ce1_bond.order != 1
):
raise PabloError(
"Zwitterionic histidine side chain detected but could not be corrected",
)
else:
return [
res.replace(
bonds=[
nd1_ce1_bond.replace(order=1),
ne2_ce1_bond.replace(order=2),
*bonds,
],
atoms=[
nd1_atom.replace(charge=0),
ne2_atom.replace(charge=0),
*(
atom
for atom in res.atoms
if atom is not nd1_atom and atom is not ne2_atom
),
],
),
]
else:
return [res]
[docs]def delete_doubly_deprotonated_arginine(
res: ResidueDefinition,
) -> list[ResidueDefinition]:
"""HH12 and HH22 are both acidic, but only one can leave at a time."""
atom_names = {atom.name for atom in res.atoms}
if "HH22" not in atom_names and "HH12" not in atom_names:
return []
else:
return [res]
[docs]def add_nh2_leaving_atom(
res: ResidueDefinition,
) -> list[ResidueDefinition]:
"""Add the missing leaving atom to the NH3 residue."""
return [
res.replace(
atoms=[
*res.atoms,
AtomDefinition.with_defaults(name="H3", symbol="H", leaving=True),
],
bonds=[*res.bonds, BondDefinition.with_defaults(atom1="H3", atom2="N")],
description=res.description + " +H3",
),
res,
]
[docs]def strip_linkless_leavers(
res: ResidueDefinition,
) -> list[ResidueDefinition]:
"""
Set ``leaving=False`` on all atoms if ``res`` has no linking bond or crosslink.
"""
if res.linking_bond is None and res.crosslink is None:
return [res.replace(atoms=[atom.replace(leaving=False) for atom in res.atoms])]
else:
return [res]