Source code for rdkit_utilities.rdDistGeom

import itertools
from typing import Optional, Dict, List, Union, Set, Tuple
from typing_extensions import Literal

import numpy as np

from rdkit import Chem as rdChem
from rdkit.Chem import AllChem as rdAllChem

from rdkit_utilities import rdForceFieldHelpers


[docs]def GetExclusions(mol: rdChem.Mol) -> Set[Tuple[int, int]]: """Get exclusions with a molecule between atoms 2- and 3- sites away""" exclusions = set() for i, atom in enumerate(mol.GetAtoms()): partners = [b.GetOtherAtomIdx(i) for b in atom.GetBonds()] exclusions |= set(tuple(sorted([i, x])) for x in partners) exclusions |= set(itertools.combinations(sorted(partners), 2)) return exclusions
[docs]def GenerateConformers( mol: rdChem.Mol, numConfs: int = 10, numConfPool: Optional[int] = None, maximizeDiversity: bool = False, selectELFConfs: bool = False, ELFPercentage: float = 2.0, pruneRmsThresh: float = -1.0, diverseRmsThresh: float = 0.05, energyWindow: float = -1.0, maxAttempts: int = 0, randomSeed: int = -1, clearConfs: bool = True, useRandomCoords: bool = False, boxSizeMult: float = 2.0, randNegEig: bool = True, numZeroFail: int = 1, coordMap: Dict[int, Union[np.ndarray, List[float]]] = {}, forceTol: float = 0.001, ignoreSmoothingFailures: bool = False, enforceChirality: bool = True, numThreads: int = 1, useExpTorsionAnglePrefs: bool = True, useBasicKnowledge: bool = True, printExpTorsionAngles: bool = False, useSmallRingTorsions: bool = False, useMacrocycleTorsions: bool = False, ETversion: int = 1, optimizeConfs: bool = False, forcefield: rdForceFieldHelpers.RDKIT_FF_TYPE = "MMFF94", optMaxIters: int = 200, ffVdwThresh: float = 10.0, ffNonBondedThresh: float = 100, ffIgnoreInterfragInteractions: bool = True, removeTransAcidConformers: bool = True, ) -> List[int]: """ Generate conformers on molecule in-place. Parameters ---------- numConfs: int Max number of conformers to end up with numConfPool: Optional[int] Max number of conformers to initially generate, for ELF or RMS selection. maximizeDiversity: bool Whether to maximize diversity via heavy-atom RMS selectELFConfs: bool Whether to select conformers using the electrostatically least-interacting functional group (ELF) technique ELFPercentage: float % (out of 100) of conformers to keep from `numConfPool` when selecting ELF conformers pruneRmsThresh: float RMS threshold to keep conformers initially generated from `numConfPool` diverseRmsThresh: float Minimum RMS threshold for distinguishing conformers when selecting maximally diverse conformers energyWindow: float If positive, only conformers within this energy window as determined by `forcefield` will be kept. optimizeConfs: bool Whether to optimize conformers generated initially, prior to selecting ELF and RMS diverse conformers. forcefield: Literal["UFF", "MMFF94", "MMFF94S"] Which force field to use for optimizing conformers and calculating energies for `energyWindow`. optMaxIters: int Maximum iterations for optimization ffVdwThresh: float VdW threshold for optimization and calculating energies using force field ffNonBondedThresh: float Non-bonded threshold for optimization and calculating energies using force field ffIgnoreInterfragInteractions: bool If True, nonbonded terms between fragments will not be added to the forcefield. **kwargs: See the RDKit documentation for ``rdkit.Chem.rdDistGeom.EmbedMultipleConfs`` for more. """ from .rdmolops import KeepConformerIds if not numConfPool: numConfPool = numConfs if maximizeDiversity or energyWindow > 0: numConfPool *= 1000 coordMap = {k: list(map(float, v)) for k, v in coordMap.items()} rdAllChem.EmbedMultipleConfs( mol, numConfs=numConfPool, maxAttempts=maxAttempts, randomSeed=randomSeed, clearConfs=clearConfs, useRandomCoords=useRandomCoords, boxSizeMult=boxSizeMult, randNegEig=randNegEig, numZeroFail=numZeroFail, pruneRmsThresh=pruneRmsThresh, coordMap=coordMap, forceTol=forceTol, ignoreSmoothingFailures=ignoreSmoothingFailures, enforceChirality=enforceChirality, numThreads=numThreads, useExpTorsionAnglePrefs=useExpTorsionAnglePrefs, useBasicKnowledge=useBasicKnowledge, printExpTorsionAngles=printExpTorsionAngles, useSmallRingTorsions=useSmallRingTorsions, useMacrocycleTorsions=useMacrocycleTorsions, ETversion=ETversion, ) if optimizeConfs: rdForceFieldHelpers.FFOptimizeMoleculeConfs( mol, forcefield=forcefield, numThreads=numThreads, maxIters=optMaxIters, vdwThresh=ffVdwThresh, nonBondedThresh=ffNonBondedThresh, ignoreInterfragInteractions=ffIgnoreInterfragInteractions, ) if energyWindow > 0: RemoveConformersOutsideEnergyWindow( mol, energyWindow=energyWindow, forcefield=forcefield, numThreads=numThreads, ffVdwThresh=ffVdwThresh, ffNonBondedThresh=ffNonBondedThresh, ffIgnoreInterfragInteractions=ffIgnoreInterfragInteractions, ) if selectELFConfs: SelectELFConformers( mol, ELFPercentage=ELFPercentage, removeTransAcidConformers=removeTransAcidConformers, ) if maximizeDiversity: SelectDiverseConformers( mol, numConfs=numConfs, diverseRmsThresh=diverseRmsThresh ) conformer_ids = [conf.GetId() for conf in mol.GetConformers()] KeepConformerIds(mol, conformer_ids[:numConfs]) return mol.GetNumConformers()
[docs]def RemoveTransAcidConformers(mol: rdChem.Mol): """Remove conformers from molecule if they contain a trans carboxylic acid""" from rdkit.Chem.rdMolTransforms import GetDihedralRad # type: ignore[import] from rdkit_utilities import rdmolfiles from .rdmolops import KeepConformerIds query = rdmolfiles.MolFromSmarts( "[#6X3:2](=[#8:1])(-[#8X2H1:3]-[#1:4])", orderByMapNumber=True, ) matches = mol.GetSubstructMatches(query, uniquify=False) to_keep = [] for conformer in mol.GetConformers(): for match in matches: angle = GetDihedralRad(conformer, *match) if angle > np.pi / 2.0: break else: to_keep.append(conformer.GetId()) KeepConformerIds(mol, to_keep)
[docs]def SelectELFConformers( mol: rdChem.Mol, ELFPercentage: float = 2.0, removeTransAcidConformers: bool = True ): """Keep percentage of conformers, ranked by electrostatic energy, in-place. The other conformers are deleted. """ from .rdmolops import KeepConformerIds if removeTransAcidConformers: RemoveTransAcidConformers(mol) SortConformersByElectrostaticInteraction(mol) goal = int(mol.GetNumConformers() * ELFPercentage / 100) n_keep = max(1, goal) conformer_ids = [c.GetId() for c in mol.GetConformers()] keep_ids = conformer_ids[:n_keep] KeepConformerIds(mol, keep_ids)
[docs]def SelectDiverseConformers( mol: rdChem.Mol, numConfs: int = 10, diverseRmsThresh: float = 0.05, ): """Select conformers for max diversity by RMS, in-place. This uses a greedy algorithm. Unselected conformers are deleted. """ from .rdMolAlign import GetBestConformerRMS from .rdmolops import KeepConformerIds from .utilities import get_maximally_diverse_indices rms_matrix = GetBestConformerRMS(mol, heavyAtomsOnly=True) diverse_indices = get_maximally_diverse_indices( rms_matrix, distance_threshold=diverseRmsThresh, n_indices=numConfs ) conformer_ids = [conf.GetId() for conf in mol.GetConformers()] keep_ids = [conformer_ids[i] for i in diverse_indices] KeepConformerIds(mol, keep_ids) for i, conf in enumerate(mol.GetConformers()): conf.SetId(i)
[docs]def SelectDiverseELFConformers( mol: rdChem.Mol, numConfs: int = 10, ELFPercentage: float = 2.0, diverseRmsThresh: float = 0.05, removeTransAcidConformers: bool = True, ): """Select diverse conformers using the ELF technique, in-place. Other conformers are deleted from the molecule. """ SelectELFConformers( mol, ELFPercentage=ELFPercentage, removeTransAcidConformers=removeTransAcidConformers, ) SelectDiverseConformers(mol, numConfs=numConfs, diverseRmsThresh=diverseRmsThresh)
[docs]def SortConformersByEnergy( mol: rdChem.Mol, forcefield: rdForceFieldHelpers.RDKIT_FF_TYPE = "MMFF94", numThreads: int = 1, ffVdwThresh: float = 10.0, ffNonBondedThresh: float = 100, ffIgnoreInterfragInteractions: bool = True, reverse: bool = False, ) -> List[float]: """Sort conformers in-place on molecule by force field energy. Returns ------- energies: List[float] Raises ------ ValueError If the force field cannot calculate energies """ from .rdmolops import ReorderConformers result = rdForceFieldHelpers.FFOptimizeMoleculeConfs( mol, forcefield=forcefield, numThreads=numThreads, maxIters=0, vdwThresh=ffVdwThresh, nonBondedThresh=ffNonBondedThresh, ignoreInterfragInteractions=ffIgnoreInterfragInteractions, ) energies = [x[1] for x in result] if any(x == -1 for x in energies): raise ValueError( f"Could not use {forcefield} to calculate energies " f"for {rdChem.MolToSmarts(mol)}. " "Cannot sort conformers by energy." ) order = np.argsort(energies) if reverse: order = order[::-1] ReorderConformers(mol, order) return [energies[i] for i in order]
[docs]def CalculateMMFFCharges( mol: rdChem.Mol, forcefield: Literal["MMFF94", "MMFF94s"] = "MMFF94", normalize_partial_charges: bool = True, ) -> np.ndarray: """Calculate MMFF charges for molecule""" forcefield = forcefield.upper() if forcefield == "MMFF94S": forcefield = "MMFF94s" mps = rdAllChem.MMFFGetMoleculeProperties(mol, forcefield) if mps is None: molstr = rdChem.MolToSmiles(mol) raise ValueError(f"MMFF charges could not be computed for {molstr}") n_atoms = mol.GetNumAtoms() charges = np.array([mps.GetMMFFPartialCharge(i) for i in range(n_atoms)]) if normalize_partial_charges: total_charge = rdChem.GetFormalCharge(mol) partial_charges = charges.sum() offset = (total_charge - partial_charges) / n_atoms charges += offset return charges
[docs]def CalculateElectrostaticEnergy( mol: rdChem.Mol, forcefield: Literal["MMFF94", "MMFF94s"] = "MMFF94", ) -> np.ndarray: """Calculate electrostatic energy of all conformers using force field Returns ------- energies: numpy.ndarray Array of energies, one for each conformer """ from .utilities import compute_atom_distance_matrix conformers = np.array([c.GetPositions() for c in mol.GetConformers()]) distances = compute_atom_distance_matrix(conformers) inverse_distances = np.reciprocal( distances, out=np.zeros_like(distances), where=~np.isclose(distances, 0) ) charges = np.abs(CalculateMMFFCharges(mol, forcefield)).reshape(-1, 1) charge_products = charges @ charges.T excl_i, excl_j = zip(*GetExclusions(mol)) charge_products[(excl_i, excl_j)] = 0.0 charge_products[(excl_j, excl_i)] = 0.0 energies = inverse_distances * charge_products return 0.5 * energies.sum(axis=(1, 2))
[docs]def SortConformersByElectrostaticInteraction( mol: rdChem.Mol, forcefield: Literal["MMFF94", "MMFF94s"] = "MMFF94", ): """Sort conformers in-place by electrostatic energy using MMFF""" from .rdmolops import ReorderConformers energies = CalculateElectrostaticEnergy(mol, forcefield) order = np.argsort(energies) ReorderConformers(mol, order)
[docs]def RemoveConformersOutsideEnergyWindow( mol: rdChem.Mol, energyWindow: float = 30.0, forcefield: rdForceFieldHelpers.RDKIT_FF_TYPE = "MMFF94", numThreads: int = 1, ffVdwThresh: float = 10.0, ffNonBondedThresh: float = 100, ffIgnoreInterfragInteractions: bool = True, ) -> int: """Remove conformers outside energy window using force field, in-place The window covers an energy range from the conformer with the lowest energy. The molecule also has its conformers sorted by force field energy. Returns ------- numConfs: int The number of remaining conformers """ from .rdmolops import KeepConformerIds if energyWindow < 0: return mol.GetNumConformers() energies = SortConformersByEnergy( mol, forcefield=forcefield, numThreads=numThreads, ffVdwThresh=ffVdwThresh, ffNonBondedThresh=ffNonBondedThresh, ffIgnoreInterfragInteractions=ffIgnoreInterfragInteractions, reverse=False, ) lowest = energies[0] highest = lowest + energyWindow conf_ids = [conf.GetId() for conf in mol.GetConformers()] mask = np.array(energies) <= highest keep = np.array(conf_ids)[mask] KeepConformerIds(mol, keep) return mol.GetNumConformers()