Source code for molsystem.molfile
# -*- coding: utf-8 -*-
"""Functions for handling MDL molfiles"""
import logging
import time
from molsystem import elements
logger = logging.getLogger(__name__)
[docs]
class MolFileMixin:
"""A mixin for handling MDL Molfiles."""
[docs]
def to_molfile_text(self, title=None, comment="Exported from SEAMM"):
"""Create the text of the Molfile from the system.
Parameters
----------
title : str = None
The title for the structure, by default the system name.
comment : str = 'Exported from SEAMM'
Comment line
Returns
-------
text : str
The text of the file.
"""
lines = []
atoms = self.atoms
bonds = self.bonds
n_atoms = atoms.n_atoms
n_bonds = bonds.n_bonds
nsgroups = 0
n3d = 0
is_chiral = 0 # may need to think about this later.
if title is None:
lines.append(self.name)
else:
lines.append(title)
date_time = time.strftime("%m%d%y%H%M")
lines.append("PS" + "SEAMM_WF" + date_time + "3D")
lines.append(comment)
lines.append(" 0 0 0 0 0 999 V3000")
lines.append("M V30 BEGIN CTAB")
lines.append(
"M V30 COUNTS {} {} {} {} {}".format(
n_atoms, n_bonds, nsgroups, n3d, is_chiral
)
)
lines.append("M V30 BEGIN ATOM")
count = 0
if "formal charges" in atoms:
for row in atoms.atoms():
count += 1
symbol = elements.to_symbols([row["atno"]])[0]
lines.append(
f"M V30 {count} {symbol} {row['x']} {row['y']} {row['z']}"
" 0 CHG={row['formal charge']}"
)
else:
for row in atoms.atoms():
count += 1
symbol = elements.to_symbols([row["atno"]])[0]
lines.append(
f"M V30 {count} {symbol} {row['x']} {row['y']} {row['z']}" " 0"
)
lines.append("M V30 END ATOM")
lines.append("M V30 BEGIN BOND")
count = 0
for row in bonds.bonds():
count += 1
lines.append(f"M V30 {count} {row['bondorder']} " f"{row['i']} {row['j']}")
lines.append("M V30 END BOND")
lines.append("M V30 END CTAB")
lines.append("M END")
return "\n".join(lines)
[docs]
def from_molfile_text(self, data):
"""Create the system from an MDL Molfile, version 3
Parameters
----------
data : str
The complete text of the Molfile.
"""
self.clear()
self.periodicity = 0
n_molecules = 0
lines = enumerate(data.splitlines())
# title
lineno, title = next(lines)
self.name = title.strip()
# header
next(lines)
# comment
next(lines)
lineno, line = next(lines)
if line.split()[6] != "V3000":
raise RuntimeError(
f"molfile:to_seamm -- the file is not version 3: '{line}'"
)
for lineno, line in lines:
logger.debug(f"{lineno}: {line}")
if "M END" in line:
break
elif "M V30 BEGIN CTAB" in line:
n_molecules += 1
if n_molecules > 1:
raise NotImplementedError("Multiple molecules?")
elif "M V30 END CTAB" in line:
pass
elif "M V30 COUNTS" in line:
natoms, nbonds, nsgroups, n3d, is_chiral = line.split()[3:]
natoms = int(natoms)
nbonds = int(nbonds)
# not used, yet.
# nsgroups = int(nsgroups)
# n3d = int(n3d)
# is_chiral = bool(is_chiral)
elif "M V30 BEGIN ATOM" in line:
logger.debug("In atom table")
xs = []
ys = []
zs = []
symbols = []
formal_charges = []
have_formal_charges = False
for lineno, line in lines:
if "M V30 END ATOM" in line:
logger.debug(f"Saving {len(xs)} atoms to system")
if have_formal_charges and "formal_charge" not in self.atoms:
logger.debug(" with formal charges")
self.atoms.add_attribute(
"formal_charge", coltype="int", default=0
)
atom_ids = self.atoms.append(
x=xs,
y=ys,
z=zs,
symbol=symbols,
formal_charge=formal_charges,
)
else:
atom_ids = self.atoms.append(
x=xs, y=ys, z=zs, symbol=symbols
)
break
i, symbol, x, y, z, q = line.split()[2:8]
xs.append(float(x))
ys.append(float(y))
zs.append(float(z))
symbols.append(symbol)
if "CHG=" in line:
for tmp in line.split()[8:]:
if "CHG=" in tmp:
formal_charges.append(int(tmp[4:]))
have_formal_charges = True
else:
formal_charges.append(0)
elif "M V30 BEGIN BOND" in line:
logger.debug("In bond table")
iatoms = []
jatoms = []
bondorders = []
for lineno, line in lines:
if "M V30 END BOND" in line:
if len(iatoms) > 0:
self.bonds.append(
i=iatoms,
j=jatoms,
bondorder=bondorders,
)
break
bondorder, iatom, jatom = line.split()[3:6]
iatoms.append(atom_ids[int(iatom) - 1])
jatoms.append(atom_ids[int(jatom) - 1])
bondorders.append(int(bondorder))