# -*- coding: utf-8 -*-
"""A base class for substeps of the xTB plug-in.
All substeps (Energy, Optimization, Frequencies, and any future substeps such
as MD or metadynamics) inherit from ``Substep``. This base class holds the
machinery that is genuinely common across substeps:
* Module-level constants for method <-> CLI flag mapping and the supported
solvent list per solvation model.
* A periodicity check that refuses periodic input with a clear message.
* A writer for the ``coord.xyz`` input file consumed by xtb.
* A builder for the common parts of the xtb command line (method, charge,
multiplicity, accuracy, solvation, JSON output).
* A wrapper around the SEAMM executor for invoking the xtb binary.
* A JSON-output parser for ``xtbout.json``.
* A text parser for the few quantities not exposed in the JSON
(notably the thermochemistry block from Hessian runs).
It deliberately does NOT bake in any assumption that a substep is
energy-related, so an MD substep, for example, can inherit directly from
``Substep`` without going through ``Energy``.
"""
import configparser
import importlib.resources
import json
import logging
from pathlib import Path
import pprint # noqa: F401
import re
import shutil
import seamm
from seamm_util import ureg, Q_ # noqa: F401
import seamm_util.printing as printing
logger = logging.getLogger(__name__)
job = printing.getPrinter()
printer = printing.getPrinter("xTB")
# ---------------------------------------------------------------------------
# Method / model mapping
# ---------------------------------------------------------------------------
#: User-facing method label -> list of xtb CLI arguments.
METHOD_TO_CLI = {
"GFN2-xTB": ["--gfn", "2"],
"GFN1-xTB": ["--gfn", "1"],
"GFN0-xTB": ["--gfn", "0"],
"GFN-FF": ["--gfnff"],
}
#: All methods we expose. Used as the parameter enumeration.
METHODS = tuple(METHOD_TO_CLI)
# ---------------------------------------------------------------------------
# Solvation
# ---------------------------------------------------------------------------
#: Solvation model labels -> CLI flag.
SOLVATION_MODEL_TO_FLAG = {
"none": None,
"ALPB": "--alpb",
"GBSA": "--gbsa",
"CPCM-X": "--cpcmx",
}
#: Solvation models we expose. ``"none"`` means no implicit solvation.
SOLVATION_MODELS = tuple(SOLVATION_MODEL_TO_FLAG)
# ALPB and GBSA share the same parametrized solvent list. The list below is
# from the xTB documentation as of v6.7. Some solvents are method-specific:
# DMF and n-hexane are GFN2-only; benzene is GFN1-only. We expose the union
# here and let xtb itself reject mismatches with a clear error.
SOLVENTS_GBSA_ALPB = (
"acetone",
"acetonitrile",
"benzene", # GFN1 only
"CH2Cl2",
"CHCl3",
"CS2",
"DMF", # GFN2 only
"DMSO",
"ether",
"H2O",
"methanol",
"n-hexane", # GFN2 only
"THF",
"toluene",
)
# CPCM-X uses the much larger Minnesota Solvation Database. The most common
# entries are listed; users can also pass arbitrary strings via the variable
# mechanism (the parameter enumeration is not strictly enforced).
SOLVENTS_CPCMX = (
"acetone",
"acetonitrile",
"aniline",
"benzaldehyde",
"benzene",
"CH2Cl2",
"CHCl3",
"CS2",
"dioxane",
"DMF",
"DMSO",
"ether",
"ethylacetate",
"furane",
"hexadecane",
"hexane",
"methanol",
"nitromethane",
"octanol",
"phenol",
"thf",
"toluene",
"water",
"octanol(wet)",
)
[docs]
def solvent_choices_for(model):
"""Return the supported solvent enumeration for a solvation model.
Parameters
----------
model : str
The solvation-model label (one of :data:`SOLVATION_MODELS`).
Returns
-------
tuple of str
The list of solvent names we expose to the user. Empty for "none".
"""
if model == "ALPB" or model == "GBSA":
return SOLVENTS_GBSA_ALPB
if model == "CPCM-X":
return SOLVENTS_CPCMX
return ()
# ---------------------------------------------------------------------------
# Substep base class
# ---------------------------------------------------------------------------
[docs]
class Substep(seamm.Node):
"""Common base class for substeps of the xTB plug-in.
Subclasses are expected to:
* Set ``self._calculation`` to a short identifier (e.g. ``"Energy"``).
* Set ``self._model`` once the active xTB Hamiltonian is known
(e.g. ``"GFN2-xTB"``).
* Provide their own ``parameters``, ``description_text``, ``run``,
and ``analyze`` methods.
The helpers below are available to all subclasses:
* :meth:`check_periodicity` -- raise on periodic input.
* :meth:`write_coord_xyz` -- write the molecule to ``coord.xyz``.
* :meth:`base_xtb_args` -- build the common part of the xtb CLI.
* :meth:`run_xtb` -- invoke xtb via the SEAMM executor.
* :meth:`read_xtbout_json` -- parse ``xtbout.json``.
* :meth:`parse_thermo_block` -- pull the thermochemistry table from the
text output.
"""
def __init__(
self,
flowchart=None,
title="no title",
extension=None,
logger=logger,
module=__name__,
):
"""Initialize the Substep base class.
Parameters
----------
flowchart : seamm.Flowchart
The non-graphical flowchart that contains this step.
title : str
The name displayed in the flowchart.
extension : None
Not yet implemented.
logger : Logger
The logger to use and pass to parent classes.
module : str
The module name, used by the SEAMM Node base.
"""
self._input_only = False
self._calculation = None
self._model = None
super().__init__(
flowchart=flowchart,
title=title,
extension=extension,
logger=logger,
)
@property
def header(self):
"""A printable header for this section of output."""
return "Step {}: {}".format(".".join(str(e) for e in self._id), self.title)
@property
def input_only(self):
"""Whether to write the input only, not run xtb."""
return self._input_only
@input_only.setter
def input_only(self, value):
self._input_only = value
@property
def is_runable(self):
"""Indicate whether this substep runs xtb or just adds input."""
return True
@property
def global_options(self):
"""Pass-through to the parent (top-level xTB step)'s global options."""
return self.parent.global_options
@property
def options(self):
"""Pass-through to the parent (top-level xTB step)'s options."""
return self.parent.options
# ------------------------------------------------------------------
# Periodicity refusal
# ------------------------------------------------------------------
[docs]
def check_periodicity(self, configuration):
"""Refuse periodic input with a clear, user-visible message.
xTB's PBC support is limited and is out of scope for v1 of this
plug-in. If the configuration is periodic, this method prints a
message via the step printer and raises ``RuntimeError`` so the
flowchart stops cleanly.
"""
if configuration.periodicity != 0:
msg = (
"The xtb_step plug-in does not currently support periodic "
"systems. The configuration has periodicity "
f"{configuration.periodicity}. Please use a molecular "
"(non-periodic) configuration, or use a different plug-in "
"(e.g. dftbplus_step) for periodic xTB-family calculations."
)
printer.important(msg)
raise RuntimeError(msg)
# ------------------------------------------------------------------
# Coordinate writer
# ------------------------------------------------------------------
[docs]
def write_coord_xyz(self, directory, configuration, filename="coord.xyz"):
"""Write the configuration to a standard XYZ file for xtb."""
directory = Path(directory)
directory.mkdir(parents=True, exist_ok=True)
path = directory / filename
if hasattr(configuration, "to_xyz_text"):
path.write_text(configuration.to_xyz_text())
else:
symbols = configuration.atoms.symbols
coords = configuration.atoms.get_coordinates(fractionals=False)
lines = [str(len(symbols)), ""]
for sym, (x, y, z) in zip(symbols, coords):
lines.append(f"{sym:<3s} {x:18.10f} {y:18.10f} {z:18.10f}")
path.write_text("\n".join(lines) + "\n")
return path
# ------------------------------------------------------------------
# CLI builders
# ------------------------------------------------------------------
[docs]
def base_xtb_args(self, P, configuration):
"""Build the parts of the xtb CLI common to all substeps.
Charge and spin multiplicity are read from the configuration, not
from ``P``. This matches the SEAMM convention (charge and spin are
properties of the chemical system, not of the calculation) and
lets high-throughput flowcharts loop over systems without per-
system parameter editing.
Parameters
----------
P : dict
The current parameter values (from
``parameters.current_values_to_dict()``). Expected keys include
``method``, ``accuracy``, ``solvation model``, ``solvent``.
configuration : molsystem.Configuration
Provides the net charge and spin multiplicity for the system.
Returns
-------
list of str
The argument list to prepend to the substep-specific arguments.
Always begins with the coordinate filename (``coord.xyz``).
"""
args = ["coord.xyz"]
args += METHOD_TO_CLI[P["method"]]
args += ["--json"]
# Charge: from the configuration, always.
charge = int(configuration.charge)
args += ["--chrg", str(charge)]
# Multiplicity / unpaired electrons. xtb takes --uhf with the number
# of UNPAIRED electrons (M - 1 for spin multiplicity M).
mult = int(configuration.spin_multiplicity)
n_unpaired = max(mult - 1, 0)
args += ["--uhf", str(n_unpaired)]
# Accuracy
if "accuracy" in P:
try:
acc = float(P["accuracy"])
except (TypeError, ValueError):
acc = 1.0
if acc != 1.0:
args += ["--acc", f"{acc:g}"]
# Solvation
smodel = P.get("solvation model", "none")
if smodel != "none":
flag = SOLVATION_MODEL_TO_FLAG[smodel]
solvent = P.get("solvent", "H2O")
args += [flag, str(solvent)]
if smodel != "CPCM-X" and P["method"] == "GFN0-xTB":
# ALPB/GBSA are not parametrized for GFN0-xTB.
logger.warning(
"Solvation models ALPB/GBSA are not parametrized for "
"GFN0-xTB. xtb may emit a warning or refuse to run."
)
return args
# ------------------------------------------------------------------
# Executor wrapper
# ------------------------------------------------------------------
[docs]
def run_xtb(self, args, return_files=None, env=None):
"""Run xtb via the SEAMM executor.
Reads the per-plug-in ``xtb.ini`` from the SEAMM root directory,
falling back to the bundled ``data/xtb.ini`` template if the user
does not yet have one. Then dispatches the run through the
flowchart's executor (which handles conda/local/modules/docker
transparently).
"""
if return_files is None:
return_files = []
if env is None:
env = {"OMP_NUM_THREADS": "1"}
executor = self.parent.flowchart.executor
executor_type = executor.name
# Locate / bootstrap the xtb.ini configuration
seamm_options = self.global_options
ini_dir = Path(seamm_options["root"]).expanduser()
ini_path = ini_dir / "xtb.ini"
if not ini_path.exists():
resources = importlib.resources.files("xtb_step") / "data"
ini_text = (resources / "xtb.ini").read_text()
ini_path.write_text(ini_text)
logger.info(f"Bootstrapped default xtb.ini at {ini_path}")
full_config = configparser.ConfigParser()
full_config.read(ini_path)
if executor_type not in full_config:
path = shutil.which("xtb")
if path is None:
raise RuntimeError(
f"No section for '{executor_type}' in the xtb.ini "
f"({ini_path}), no defaults provided, and no 'xtb' "
"executable found on $PATH. Please install xtb (e.g. "
"via 'conda install -c conda-forge xtb') or edit "
f"{ini_path} to point at an xtb installation."
)
full_config.add_section(executor_type)
full_config.set(executor_type, "installation", "local")
full_config.set(executor_type, "code", str(path))
with ini_path.open("w") as fd:
full_config.write(fd)
logger.info(
f"Added a '{executor_type}' section to {ini_path} pointing "
f"at {path}"
)
config = dict(full_config.items(executor_type))
code = config.get("code", "xtb")
cmd = [code, *args]
result = executor.run(
cmd=cmd,
config=config,
directory=self.directory,
files={},
return_files=return_files,
in_situ=True,
shell=True,
env=env,
)
if not result:
self.logger.error("There was an error running xtb")
return None
logger.debug("xtb run result:\n" + pprint.pformat(result))
return result
# ------------------------------------------------------------------
# JSON parser
# ------------------------------------------------------------------
[docs]
def read_xtbout_json(self, directory=None, filename="xtbout.json"):
"""Read the xtb JSON output produced by ``--json``.
xTB writes ``xtbout.json`` in the working directory when invoked with
``--json``. The schema includes (at least, in recent releases):
``"total energy"``, ``"HOMO-LUMO gap / eV"``, ``"electronic
energy"``, ``"dipole / a.u."``, ``"partial charges"``, plus
method-specific fields. The exact set varies between xtb versions;
callers should treat the returned dict as best-effort.
Parameters
----------
directory : pathlib.Path, optional
The directory containing the file. Defaults to ``self.directory``.
filename : str
The JSON filename to read. Defaults to ``xtbout.json``.
Returns
-------
dict
The parsed JSON data, or an empty dict if the file is missing
or unreadable.
"""
if directory is None:
directory = self.directory
path = Path(directory) / filename
if not path.exists():
logger.warning(f"xtb JSON output not found: {path}")
return {}
try:
with path.open() as fd:
return json.load(fd)
except (OSError, json.JSONDecodeError) as exc:
logger.warning(f"Could not parse xtb JSON output {path}: {exc}")
return {}
# ------------------------------------------------------------------
# Text-output parsers
# ------------------------------------------------------------------
[docs]
def parse_thermo_block(self, stdout):
"""Pull thermochemistry quantities from xtb stdout.
xtb's Hessian runs print a ``:: THERMODYNAMIC ::`` summary block
and a per-temperature table. We extract the key scalars at a single
temperature here. The format is::
:: total free energy -41.971849822766 Eh ::
:: total energy -42.153937303642 Eh ::
:: zero point energy 0.182087480876 Eh ::
:: G(RRHO) w/o ZPVE 0.000000000000 Eh ::
:: G(RRHO) contrib. 0.182087480876 Eh ::
and a temperature line like::
T/K H(0)-H(T)+PV H(T)/Eh T*S/Eh G(T)/Eh
298.15 ... ... ... ...
Energies are converted from E_h (xtb's native unit) to kJ/mol so
the values returned here are chemist-friendly. The entropy
contribution is reported both as ``entropy_term`` (T*S in
kJ/mol, the form xtb prints) and as ``entropy`` (S in J/mol/K,
the form that appears in standard tables and chemistry papers).
Parameters
----------
stdout : str
The captured stdout (or contents of an ``xtb.out`` file) from a
Hessian run.
Returns
-------
dict
Possibly containing keys: ``total_free_energy``,
``zero_point_energy``, ``temperature``, ``enthalpy``,
``entropy_term``, ``entropy``, ``gibbs_free_energy``. All
energies in kJ/mol, ``entropy`` in J/mol/K, ``temperature``
in K. Missing entries indicate parsing failed for that
quantity; callers should handle absence gracefully.
"""
out = {}
eh_to_kjmol = (1.0 * ureg.hartree).to("kJ/mol").magnitude
# Scalar lines from the THERMODYNAMIC summary block. Values in
# the file are in E_h; convert to kJ/mol on the fly.
scalar_patterns = {
"total_free_energy": r":: total free energy\s+(-?\d+\.\d+)\s*Eh",
"zero_point_energy": r":: zero point energy\s+(-?\d+\.\d+)\s*Eh",
}
for key, pat in scalar_patterns.items():
m = re.search(pat, stdout)
if m:
try:
out[key] = float(m.group(1)) * eh_to_kjmol
except ValueError:
pass
# The per-temperature table: take the first data row after the
# T/K header line.
# Header looks roughly like:
# T/K H(0)-H(T)+PV H(T)/Eh T*S/Eh G(T)/Eh
thermo_header = re.search(
r"T/K\s+H\(0\)-H\(T\)\+PV\s+H\(T\)/Eh\s+T\*S/Eh\s+G\(T\)/Eh",
stdout,
)
if thermo_header:
tail = stdout[thermo_header.end() :]
# Skip dash-separator lines, find first numeric row of 5 floats.
for line in tail.splitlines():
line = line.strip()
if not line or set(line) <= {"-"}:
continue
# Numbers may be in 0.123E+02 or plain decimal notation.
num = r"-?\d+(?:\.\d+)?(?:[Ee][+-]?\d+)?"
m = re.match(rf"({num})\s+({num})\s+({num})\s+({num})\s+({num})", line)
if m:
try:
T = float(m.group(1))
H_eh = float(m.group(3))
TS_eh = float(m.group(4))
G_eh = float(m.group(5))
except ValueError:
break
out["temperature"] = T # K
out["enthalpy"] = H_eh * eh_to_kjmol # kJ/mol
out["entropy_term"] = TS_eh * eh_to_kjmol # T*S as kJ/mol
out["gibbs_free_energy"] = G_eh * eh_to_kjmol # kJ/mol
# Derive entropy itself in J/mol/K (chemist's preferred form).
if T > 0.0:
out["entropy"] = (TS_eh * eh_to_kjmol * 1000.0) / T
break
return out