Source code for molsystem.topology

# -*- coding: utf-8 -*-

"""Topological methods for the system"""

import logging
from math import floor
import pprint  # noqa: F401

try:
    from openbabel import openbabel
except ModuleNotFoundError:
    print(
        "Please install openbabel using conda:\n"
        "     conda install -c conda-forge openbabel"
    )
    raise

logger = logging.getLogger(__name__)


[docs] class TopologyMixin: """A mixin for handling topology in a configuration."""
[docs] def find_molecules(self, as_indices=False): """Find the separate molecules. Parameters ---------- as_indices : bool = False Whether to return 0-based indices (True) or atom ids (False) Returns ------- molecules : [[int]*n_molecules] A list of lists of atom ids or indices for the molecules """ molecules = [] n_atoms = self.atoms.n_atoms if n_atoms == 0: return molecules if not as_indices and self.symmetry.n_symops > 1: raise RuntimeError( "Cannot return atom ids for bonded_neighbors when there is symmetry" ) neighbors = self.bonded_neighbors(as_indices=True) visited = [False] * n_atoms while True: # Find first atom not yet visited try: i = visited.index(False) except ValueError: break visited[i] = True atoms = [i] next_atoms = neighbors[i] while len(next_atoms) > 0: tmp = [] for i in next_atoms: if not visited[i]: atoms.append(i) visited[i] = True tmp.extend(neighbors[i]) next_atoms = tmp molecules.append(sorted(atoms)) if as_indices: return molecules else: atom_ids = self.atoms.ids to_id = {i: j for i, j in enumerate(atom_ids)} return [[to_id[j] for j in js] for js in molecules]
[docs] def bonded_neighbors(self, as_indices=False, first_index=0): """The atoms bonded to each atom in the system. Parameters ---------- as_indices : bool = False Whether to return 0-based indices (True) or atom ids (False) first_index : int = 0 The smallest index, e.g. 0 or 1 Returns ------- neighbors : {int: [int]} or [[int]] for indices list of atom ids for each atom id """ neighbors = {} symmetry = self.symmetry atoms = self.atoms bonds = self.bonds n_atoms = atoms.n_atoms if n_atoms == 0: if as_indices: return [] else: return neighbors if self.symmetry.n_symops > 1: # Symmetry involved... if not as_indices: raise RuntimeError( "Cannot return atom ids for bonded_neighbors when there is symmetry" ) pairs = symmetry.bond_atoms neighbors = [[] for i in range(n_atoms)] for i, j in pairs: neighbors[i].append(j) neighbors[j].append(i) for js in neighbors: js.sort() return neighbors else: atom_ids = atoms.ids neighbors = {i: [] for i in atom_ids} if bonds.n_bonds > 0: for bond in bonds.bonds(): i = bond["i"] j = bond["j"] neighbors[i].append(j) neighbors[j].append(i) if as_indices: # Convert to indices to_index = {j: i + first_index for i, j in enumerate(atom_ids)} result = [[]] * (n_atoms + first_index) for i, js in neighbors.items(): result[to_index[i]] = sorted([to_index[j] for j in js]) return result else: for i in neighbors: neighbors[i].sort() return neighbors
[docs] def create_molecule_subsets(self): """Create a subset for each molecule in a configuration. Returns ------- [int] The ids of the subsets, one per molecule. """ # Find the molecules and the create the subsets if they don't exist. molecules = self.find_molecules() # Remove any previous subsets for this configuration subsets = self["subset"] tid = 1 sids = subsets.find(tid) if len(sids) > 0: subsets.delete(sids) # Now create the new set. sids = [] for atom_ids in molecules: sid = subsets.create(tid, atoms=atom_ids) sids.append(sid) return sids
[docs] def create_molecule_templates(self, full_templates=True, create_subsets=True): """Create a template for each unique molecule in a configuration. By default also create subsets linking each template to the atoms of the molecules in the system. Parameters ---------- full_templates : bool = True If true, create full templates by creating systems for the molecules. create_subsets : bool = True If true, create subsets linking the templates to the molecules. Returns ------- [int] or [[int], [int]] The ids of the templates, or if create_subsets is True a two-element list containing the list of templates and list of subsets. """ templates = self.system_db.templates # Find the molecules molecules = self.find_molecules() n_molecules = len(molecules) # And the molecule each atom is in atom_to_molecule = {} for molecule, atoms in enumerate(molecules): for atom in atoms: atom_to_molecule[atom] = molecule # The bonds in each molecule bonds_per_molecule = [[] for i in range(n_molecules)] for bond in self.bonds.bonds(): i = bond["i"] j = bond["j"] order = bond["bondorder"] molecule = atom_to_molecule[i] bonds_per_molecule[molecule].append((i, j, order)) # Get the canonical smiles for each molecule to_can = openbabel.OBConversion() to_can.SetOutFormat("can") ob_mol = openbabel.OBMol() ob_template = openbabel.OBMol() atnos = self.atoms.atomic_numbers xyzs = self.atoms.get_coordinates(fractionals=False) atom_index = {j: i for i, j in enumerate(self.atoms.ids)} new_subsets = {} sids = {} new_templates = [] tids = [] for molecule, atoms in enumerate(molecules): to_index = {j: i for i, j in enumerate(atoms)} molecule_atnos = [atnos[atom_index[i]] for i in atoms] ob_mol.Clear() for atom, atno in zip(atoms, molecule_atnos): ob_atom = ob_mol.NewAtom() ob_atom.SetAtomicNum(atno) bonds = [] for i, j, order in bonds_per_molecule[molecule]: bonds.append((to_index[i], to_index[j], order)) # 1-based indices in ob. ob_mol.AddBond(to_index[i] + 1, to_index[j] + 1, order) canonical = to_can.WriteString(ob_mol).strip() if full_templates: # See if a molecule template with the canonical smiles exists if templates.exists(canonical, "molecule"): template = templates.get(canonical, category="molecule") else: # Create a new system & configuration for the template system_name = "template system " + canonical if not self.system_db.system_exists(system_name): system = self.system_db.create_system( system_name, make_current=False ) configuration = system.create_configuration( canonical, make_current=False ) cid = configuration.id kwargs = {} kwargs["atno"] = molecule_atnos molecule_xyzs = [xyzs[atom_index[i]] for i in atoms] kwargs["x"] = [x for x, y, z in molecule_xyzs] kwargs["y"] = [y for x, y, z in molecule_xyzs] kwargs["z"] = [z for x, y, z in molecule_xyzs] ids = configuration.atoms.append(**kwargs) kwargs = {} kwargs["i"] = [ids[x] for x, _, _ in bonds] kwargs["j"] = [ids[x] for _, x, _ in bonds] kwargs["bondorder"] = [x for _, _, x in bonds] configuration.bonds.append(**kwargs) template = templates.create( canonical, category="molecule", configuration=cid ) else: if templates.exists(canonical, "molecule"): template = templates.get(canonical, category="molecule") else: template = templates.create(canonical, category="molecule") if template.id not in tids: tids.append(template.id) new_templates.append(template) if create_subsets: if full_templates: # Need to reorder the atoms to match the template atoms # Prepare the OB molecule for the template ob_template.Clear() for atno in template.atoms.atomic_numbers: ob_atom = ob_template.NewAtom() ob_atom.SetAtomicNum(atno) tatom_ids = template.atoms.ids to_index = {j: i for i, j in enumerate(tatom_ids)} for row in template.bonds.bonds(): i = to_index[row["i"]] j = to_index[row["j"]] order = row["bondorder"] ob_template.AddBond(i + 1, j + 1, order) # Get the mapping from template to molecule query = openbabel.CompileMoleculeQuery(ob_template) mapper = openbabel.OBIsomorphismMapper.GetInstance(query) mapping = openbabel.vpairUIntUInt() mapper.MapFirst(ob_mol, mapping) reordered_atoms = [atoms[j] for i, j in mapping] if len(reordered_atoms) == 0: logger.warning( "There are no atoms in the subset for this molecule!" ) else: subset = self.subsets.create( template, reordered_atoms, tatom_ids ) else: subset = self.subsets.create(template, atoms) tid = template.id if tid not in sids: sids[tid] = [] new_subsets[tid] = [] sids[tid].append(subset.id) new_subsets[tid].append(subset) if create_subsets: return new_templates, new_subsets else: return new_templates
[docs] def get_molecule_smiles(self): """Return the a list of the canonical SMILES for each molecule.. Returns ------- [str] The canonical SMILES for each molecule, in order that they are found. """ # Find the molecules molecules = self.find_molecules() n_molecules = len(molecules) # And the molecule each atom is in atom_to_molecule = {} for molecule, atoms in enumerate(molecules): for atom in atoms: atom_to_molecule[atom] = molecule # The bonds in each molecule bonds_per_molecule = [[] for i in range(n_molecules)] for bond in self.bonds.bonds(): i = bond["i"] j = bond["j"] order = bond["bondorder"] molecule = atom_to_molecule[i] bonds_per_molecule[molecule].append((i, j, order)) # Get the canonical smiles for each molecule to_can = openbabel.OBConversion() to_can.SetOutFormat("can") ob_mol = openbabel.OBMol() atnos = self.atoms.atomic_numbers atom_index = {j: i for i, j in enumerate(self.atoms.ids)} SMILES = [] for molecule, atoms in enumerate(molecules): to_index = {j: i for i, j in enumerate(atoms)} molecule_atnos = [atnos[atom_index[i]] for i in atoms] ob_mol.Clear() for atom, atno in zip(atoms, molecule_atnos): ob_atom = ob_mol.NewAtom() ob_atom.SetAtomicNum(atno) bonds = [] for i, j, order in bonds_per_molecule[molecule]: bonds.append((to_index[i], to_index[j], order)) # 1-based indices in ob. ob_mol.AddBond(to_index[i] + 1, to_index[j] + 1, order) canonical = to_can.WriteString(ob_mol).strip() SMILES.append(canonical) return SMILES
[docs] def reimage_bonded_atoms(self, reimage_molecules=True): """Ensure that the atoms in a molecule are "near" each other. In a periodic system atoms can be translated by a unit cell without changing anything. However the bonds in a molecule need to account for such translations of atoms otherwise a bond may appear to be very long. It is often convenient physically move atoms by the correct cell translations to bring the atoms of a molecule close to each other. That is what this method does, moving all the atoms close to the first. Optionally the molecule is also moved to bring its geometric center into the primary unit cell, i.e. with fractional coordinates in the range [0..1). Parameters ---------- reimage_molecules : bool = True Whether to move molecules into the primary unit cell. Returns ------- bool True if the coordinates were changed. """ if self.periodicity == 0: # Nothing to do. return False logger.log(0, f"Configuration {self.id} {self}") logger.log(0, self.atoms) logger.log(0, 10 * "+") # The bonds from each atom bonds = self.bonded_neighbors(as_indices=True) logger.debug("Bonds:") logger.debug(pprint.pformat(bonds)) logger.debug("") # And coordinates as fractionals tmp = self.atoms.get_coordinates(fractionals=True) xyz = [[x, y, z] for x, y, z in tmp] visited = [False] * len(xyz) # Find the molecules molecules = self.find_molecules(as_indices=True) # Work through the atoms, see if bonded atoms need to be moved changed = False for atoms in molecules: logger.debug("") logger.debug(f"{atoms=}") # if len(atoms) > 1: # atom = sorted(atoms)[0] # logger.debug(f" --> {atom}") # if self._image_helper(bonds, xyz, atom): # changed = True changed = self._image_helper2(bonds, xyz, visited, atoms[0], changed) # Check temporarily. for iatom, jatoms in enumerate(bonds): xyzi = xyz[iatom] for jatom in jatoms: xyzj = xyz[jatom] for vi, vj in zip(xyzi, xyzj): delta = floor(vj - vi + 0.5) if delta != 0: print( f"Warning: {iatom} - {jatom} delta = {vj:.3f} - {vi:.3f} = " f"{delta}" ) # Move the center of molecules into the primary cell, if requested. if reimage_molecules: for atoms in molecules: cx = cy = cz = 0.0 for atom in atoms: x, y, z = xyz[atom] cx += x cy += y cz += z n = len(atoms) dx = int(cx / n) dy = int(cy / n) dz = int(cz / n) if dx != 0 or dy != 0 or dz != 0: changed = True for atom in atoms: x, y, z = xyz[atom] xyz[atom] = [x - dx, y - dy, z - dz] if changed: self.atoms.set_coordinates(xyz, fractionals=True) return changed
def _image_helper(self, bonds, xyz, atom): """Translate all the bonded neighbors with larger index. Parameters ---------- bonds : [[int]] The bonded neighbors for each atom. xyz : [[float * 3]] The fractional coordinates of the atoms. """ logger.debug(f" helper {atom}: {bonds[atom]}") changed = False xyzi = xyz[atom] for jatom in bonds[atom]: logger.debug(f"\t\t{atom=} {jatom}") if jatom < atom: continue xyzj = xyz[jatom] new_xyz = [] shift = False for vi, vj in zip(xyzi, xyzj): delta = floor(vj - vi + 0.5) if delta == 0: new_xyz.append(vj) else: new_xyz.append(vj - delta) shift = True logger.log( 0, f"{atom:3d} - {jatom:3d}: {xyzi} {xyzj} --> {new_xyz} {shift}" ) if shift: changed = True logger.debug( f"{atom:3d} - {jatom:3d}: {xyzi} {xyzj} --> {new_xyz} {shift}" ) xyz[jatom] = new_xyz for jatom in bonds[atom]: if jatom < atom: continue if self._image_helper(bonds, xyz, jatom): changed = True return changed
[docs] def reimage_molecules(self): """Reimage molecules into the primary unit cell. The molecules are moved to bring their geometric center into the primary unit cell, i.e. with fractional coordinates in the range [0..1). Returns ------- bool True if the coordinates were changed. """ if self.periodicity == 0: # Nothing to do. return False # And coordinates as fractionals xyz = self.atoms.get_coordinates(fractionals=True) # Find the molecules molecules = self.find_molecules(as_indices=True) # Move the centor of molecules into the primary cell, if requested. changed = False for atoms in molecules: cx = cy = cz = 0.0 for atom in atoms: x, y, z = xyz[atom] cx += x cy += y cz += z n = len(atoms) dx = int(cx / n) dy = int(cy / n) dz = int(cz / n) if dx != 0 or dy != 0 or dz != 0: changed = True for atom in atoms: x, y, z = xyz[atom] xyz[atom] = [x - dx, y - dy, z - dz] if changed: self.atoms.set_coordinates(xyz, fractionals=True) return changed
def _image_helper2(self, bonds, xyz, visited, iatom, changed): visited[iatom] = True xyzi = xyz[iatom] for jatom in bonds[iatom]: logger.debug(f"\t\t{iatom=} {jatom}") if visited[jatom]: continue new_xyz = [] xyzj = xyz[jatom] shift = False for vi, vj in zip(xyzi, xyzj): delta = floor(vj - vi + 0.5) if delta == 0: new_xyz.append(vj) else: new_xyz.append(vj - delta) changed = True shift = True if shift: logger.debug( f"{iatom:3d} - {jatom:3d}: {xyzi} {xyzj} --> {new_xyz} {shift}" ) xyz[jatom] = new_xyz changed = self._image_helper2(bonds, xyz, visited, jatom, changed) return changed