Source code for molsystem.smiles

# -*- coding: utf-8 -*-

"""Functions for handling SMILES"""

import logging

try:
    from openbabel import openbabel
except ModuleNotFoundError:
    print(
        "Please install openbabel using conda:\n"
        "     conda install -c conda-forge openbabel"
    )
    raise
try:
    from rdkit import Chem
    from rdkit.Chem import AllChem
except ModuleNotFoundError:
    print("Please install RDKit using conda:\n     conda install -c conda-forge rdkit")
    raise

logger = logging.getLogger(__name__)


[docs] class SMILESMixin: """A mixin for handling SMILES.""" @property def canonical_smiles(self): """Return the canonical SMILES string for this object.""" return self.to_smiles(canonical=True) @property def smarts(self): """Return the SMARTS string for this object.""" return self.to_smarts() @property def smiles(self): """Return the SMILES string for this object.""" return self.to_smiles()
[docs] def to_smiles(self, canonical=False, hydrogens=False, isomeric=True, rdkit=False): """Create the SMILES string from the system. Parameters ---------- canonical : bool = False Whether to create canonical SMILES hydrogens : bool = False Whether to keep H's in the SMILES string. isomeric : bool = True Whether to use isomeric SMILES rdkit : bool = False Whether to use RDKit rather than default of OpenBabel Returns ------- str The SMILES string, or (SMILES, name) if the rname is requested """ logger.info("to_smiles") if rdkit: mol = self.to_RDKMol() if isomeric: Chem.FindPotentialStereo(mol) if hydrogens: mol2 = mol else: mol2 = AllChem.RemoveHs(mol) if canonical: smiles = Chem.MolToSmiles(mol2, isomericSmiles=isomeric) else: smiles = Chem.MolToSmiles( mol2, isomericSmiles=isomeric, canonical=False ) else: obConversion = openbabel.OBConversion() if canonical: obConversion.SetOutFormat("can") else: obConversion.SetOutFormat("smi") mol = self.to_OBMol() if hydrogens: obConversion.AddOption("h") if not isomeric: obConversion.AddOption("i") smiles = obConversion.WriteString(mol) logger.info(f"smiles = '{smiles}'") return smiles.strip()
[docs] def from_smiles(self, smiles, name=None, reorient=True, rdkit=False): """Create the system from a SMILES string. Parameters ---------- smiles : str The SMILES string name : str = None The name of the molecule reorient : bool = True Whether to reorient to the standard orientation rdkit : bool = False Whether to use RDKit rather than default of OpenBabel Returns ------- None """ save = self.name if rdkit: mol = Chem.rdmolfiles.MolFromSmiles(smiles) if mol is None: raise ValueError(f"SMILES '{smiles}' is not valid.") mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) self.from_RDKMol(mol) else: obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obConversion.AddOption("3") mol = openbabel.OBMol() obConversion.ReadString(mol, smiles) # Add hydrogens mol.AddHydrogens() # Get coordinates for a 3-D structure builder = openbabel.OBBuilder() builder.Build(mol) self.from_OBMol(mol) # Rotate to standard orientation if reorient: rdkMol = self.to_RDKMol() rdkConf = rdkMol.GetConformers()[0] Chem.rdMolTransforms.CanonicalizeConformer(rdkConf) self.from_RDKMol(rdkMol) if name is not None: self.name = name else: self.name = save
[docs] def to_smarts(self): """Generate a SMARTS string for this object. Returns ------- str The SMARTS string. """ generator = GenSMARTS(self) return generator.smarts
[docs] class GenSMARTS(object): """A class to generate SMARTS strings for an object. Parameters ---------- mol_object : _Configuration, _Template, _Subset """ def __init__(self, mol_object=None): self.smarts = "" self._mol_object = None self.symbols = [] self.visited = [] self.neighbors = [] self.mol_object = mol_object @property def mol_object(self): """The molecular object to work with.""" return self._mol_object @mol_object.setter def mol_object(self, value): self._mol_object = value if self._mol_object is not None: atoms = self.mol_object.atoms bonds = self.mol_object.bonds self.symbols = atoms.symbols # Find the neighbors of each atom for walking the structure tmp_neighbors = {i: [] for i in atoms.ids} for bond in bonds.bonds(): i = bond["i"] j = bond["j"] bo = bond["bondorder"] tmp_neighbors[i].append((j, bo)) tmp_neighbors[j].append((i, bo)) # The index of each atom id in the list, 0..natoms-1 index = {j: i for i, j in enumerate(atoms.ids)} # Convert to indices self.neighbors = {} for i, js in tmp_neighbors.items(): self.neighbors[index[i]] = [(index[j], bo) for j, bo in js] # An array to keep track of the atoms visited self.visited = [False] * atoms.n_atoms # And generate the SMARTS string self.smarts = self._recurse(atom=0) def _recurse(self, atom): """Helper method for recursion to generate SMARTS. Parameters ---------- atom : int Index of the atom to work with. """ symbol = self.symbols[atom] self.visited[atom] = True # How many neighbors, and how many are hydrogens nX = len(self.neighbors[atom]) nH = 0 for neighbor, bondorder in self.neighbors[atom]: if self.symbols[neighbor] == "H": nH += 1 # Find any non-hydrogen neighbors that have not been visited, # and recurse. sub_smarts = [] for neighbor, bondorder in self.neighbors[atom]: if self.symbols[neighbor] != "H" and not self.visited[neighbor]: if bondorder == 1: bond = "" elif bondorder == 2: bond = "=" elif bondorder == 3: bond = "#" elif bondorder == 5: # Aromatic symbol = symbol.lower() bond = ":" else: bond = "~" sub_smarts.append(f"{bond}{self._recurse(neighbor)}") smarts = f"[{symbol}H{nH}X{nX}]" if len(sub_smarts) > 0: for sub in sub_smarts[0:-1]: smarts += f"({sub})" smarts += sub_smarts[-1] return smarts