# -*- coding: utf-8 -*-
"""Non-graphical part of the Energy step in an xTB flowchart."""
import importlib.resources
import logging
import math
from pathlib import Path
import pprint # noqa: F401
import textwrap
from tabulate import tabulate
import xtb_step
from .substep import Substep
import molsystem
import seamm
from seamm_util import ureg, Q_ # noqa: F401
import seamm_util.printing as printing
from seamm_util.printing import FormattedText as __
logger = logging.getLogger(__name__)
job = printing.getPrinter()
printer = printing.getPrinter("xTB")
# Add this module's properties to the standard properties
path = importlib.resources.files("xtb_step") / "data"
csv_file = path / "properties.csv"
if path.exists():
molsystem.add_properties_from_file(csv_file)
# Conversion factor: xtb reports dipole in atomic units (e * Bohr).
# 1 a.u. (dipole) = 2.541746229 D (CODATA via Pint).
# We compute it from Pint to stay consistent with the rest of SEAMM.
_AU_DIPOLE_TO_DEBYE = (
(1.0 * ureg.elementary_charge * ureg.bohr).to(ureg.debye).magnitude
)
[docs]
class Energy(Substep):
"""Run an xTB single-point energy.
See Also
--------
Substep, EnergyParameters, TkEnergy
"""
def __init__(self, flowchart=None, title="Energy", extension=None, logger=logger):
"""A substep for an xTB single-point energy.
Parameters
----------
flowchart : seamm.Flowchart
The non-graphical flowchart that contains this step.
title : str
The name displayed in the flowchart.
extension : None
Not yet implemented.
logger : Logger
The logger to use and pass to parent classes.
"""
logger.debug(f"Creating Energy {self}")
super().__init__(
flowchart=flowchart,
title=title,
extension=extension,
module=__name__,
logger=logger,
)
self._calculation = "Energy"
self._model = None
self._metadata = xtb_step.metadata
self.parameters = xtb_step.EnergyParameters()
@property
def version(self):
"""The semantic version of this module."""
return xtb_step.__version__
@property
def git_revision(self):
"""The git version of this module."""
return xtb_step.__git_revision__
# ------------------------------------------------------------------
# Description
# ------------------------------------------------------------------
[docs]
def description_text(self, P=None, calculation_type="single-point energy"):
"""Create the text description of what this step will do.
Note: charge and spin multiplicity are NOT mentioned here. They
are properties of the system, not of the calculation, and the
SEAMM convention is to not echo them at the step level.
"""
if not P:
P = self.parameters.values_to_dict()
method = P["method"]
text = (
f"An xTB {calculation_type} calculation using the {method} " "Hamiltonian"
)
# Solvation
smodel = P.get("solvation model", "none")
if smodel != "none":
text += f" with implicit solvation in {P['solvent']} ({smodel})"
text += "."
# Accuracy
try:
acc = float(P.get("accuracy", 1.0))
except (TypeError, ValueError):
acc = 1.0
if acc != 1.0:
text += f" SCC accuracy multiplier set to {acc:g}."
return self.header + "\n" + __(text, **P, indent=4 * " ").__str__()
# ------------------------------------------------------------------
# Run
# ------------------------------------------------------------------
[docs]
def run(self, extra_args=None):
"""Run the Energy step.
Parameters
----------
extra_args : list of str, optional
Additional xtb command-line arguments to append after the base
arguments. Used by Optimization and Frequencies subclasses to
inject ``--opt`` / ``--ohess`` flags. Defaults to none.
Returns
-------
seamm.Node
The next node object in the flowchart.
"""
next_node = super().run(printer)
# Resolve parameter values (dereferences any flowchart variables)
P = self.parameters.current_values_to_dict(
context=seamm.flowchart_variables._data
)
# Print what we are doing
printer.important(__(self.description_text(P), indent=self.indent))
directory = Path(self.directory)
directory.mkdir(parents=True, exist_ok=True)
# Get the current configuration; refuse periodic input.
# Charge and spin multiplicity come from the configuration, not P.
_, configuration = self.get_system_configuration(None)
self.check_periodicity(configuration)
# Lock in the active model so store_results() formats property names
# correctly.
self._model = P["method"]
# Cite the xTB references appropriate for this calculation.
self._cite_references(P["method"], P.get("solvation model", "none"))
# Write coord.xyz
self.write_coord_xyz(directory, configuration)
# Build the command line. base_xtb_args() reads charge and
# multiplicity from the configuration.
args = self.base_xtb_args(P, configuration)
if extra_args:
args += list(extra_args)
# Allow user-specified extra keywords to be appended.
extra_kw = P.get("extra keywords", []) or []
if isinstance(extra_kw, str):
extra_kw = extra_kw.split()
args += [str(k) for k in extra_kw]
# Default returned files. Globs ensure we pick up files xtb writes
# whose names depend on the calculation type:
# *.xyz -- xtbopt.xyz and any other geometry files xtb writes
# *.mol -- xtbtopo.mol (topology with bond orders)
# *.log -- xtbopt.log (optimization trajectory)
# vibspectrum -- IR spectrum (Turbomole format) from Hessian runs
# hessian -- Hessian matrix (Turbomole format)
# g98.out -- Gaussian-98-format output for visualization
# Subclasses may extend this list further.
return_files = [
"xtbout.json",
"xtbrestart",
"*.xyz",
"*.mol",
"*.log",
"vibspectrum",
"hessian",
"g98.out",
]
result = self.run_xtb(args, return_files=return_files)
# Persist stdout/stderr for the user
if result is not None:
(directory / "xtb.out").write_text(result.get("stdout", ""))
stderr = result.get("stderr", "")
if stderr:
(directory / "xtb.err").write_text(stderr)
# Harvest results
data = self._collect_results(directory, configuration, result)
# Store in the SEAMM property database / variables / tables
self.store_results(configuration=configuration, data=data)
# Print a brief summary into step.out as a table
self.analyze(data=data, P=P)
return next_node
# ------------------------------------------------------------------
# Result collection
# ------------------------------------------------------------------
def _collect_results(self, directory, configuration, run_result):
"""Pull values from xtbout.json (and stdout if needed) into a dict.
Keys here must match the keys in ``metadata["results"]``.
"""
data = {}
json_data = self.read_xtbout_json(directory)
if json_data:
self._harvest_json(json_data, data, configuration)
return data
def _harvest_json(self, json_data, data, configuration):
"""Populate ``data`` from the xtbout.json contents.
The schema as documented by xTB uses these top-level keys, with
mild version-to-version drift:
* ``"total energy"`` (E_h)
* ``"electronic energy"`` (E_h)
* ``"HOMO-LUMO gap / eV"``
* ``"dipole / a.u."`` (3-vector)
* ``"partial charges"``
Defensive: a missing key just means that result does not get
stored, not that the run failed.
"""
if "total energy" in json_data:
data["total_energy"] = float(json_data["total energy"])
if "electronic energy" in json_data:
data["electronic_energy"] = float(json_data["electronic energy"])
# Gap
for key in ("HOMO-LUMO gap / eV", "HOMO-LUMO gap/eV"):
if key in json_data:
data["homo_lumo_gap"] = float(json_data[key])
break
# Orbital energies if present (some xtb versions include them)
for key in ("HOMO orbital eigenvalue / eV", "HOMO/eV"):
if key in json_data:
data["homo_energy"] = float(json_data[key])
break
for key in ("LUMO orbital eigenvalue / eV", "LUMO/eV"):
if key in json_data:
data["lumo_energy"] = float(json_data[key])
break
# Dipole. xtb gives the vector in a.u.; we store both vector and
# magnitude, converted to debye.
dipole_au = None
for key in ("dipole / a.u.", "dipole/a.u."):
if key in json_data:
dipole_au = json_data[key]
break
if dipole_au is not None and len(dipole_au) >= 3:
dx, dy, dz = (float(c) for c in dipole_au[:3])
mag_au = math.sqrt(dx * dx + dy * dy + dz * dz)
data["dipole_vector"] = [
dx * _AU_DIPOLE_TO_DEBYE,
dy * _AU_DIPOLE_TO_DEBYE,
dz * _AU_DIPOLE_TO_DEBYE,
]
data["dipole_moment"] = mag_au * _AU_DIPOLE_TO_DEBYE
if "partial charges" in json_data:
data["partial_charges"] = [float(q) for q in json_data["partial charges"]]
# ------------------------------------------------------------------
# Citations
# ------------------------------------------------------------------
def _cite_references(self, method, solvation_model):
"""Add the appropriate citations to the references handler.
SEAMM's convention (per MOPAC) is to put at level 1 every paper
that contributed to producing the result -- the program, the
method, the dispersion correction, the solvation model, etc. --
and let the user cull the list to fit the citation budget of
their target journal. Level 2 is for component-of-component
references that a typical paper would not include.
For an xTB calculation the level-1 citations are:
- General xTB review (Bannwarth2021), always
- The method-specific paper (Bannwarth2019, Grimme2017,
Pracht2019, Spicher2020) for the active Hamiltonian
- The implicit-solvation paper (Ehlert2021 for ALPB/GBSA,
Stahn2023 for CPCM-X) when solvation is on
- The DFT-D4 papers (Caldeweyher 2017, 2019, 2020) when
GFN2-xTB is used, since GFN2 incorporates a D4-style
dispersion correction. Other GFN methods use older or
different dispersion treatments and do not trigger
these.
"""
if self.references is None:
return
# Level 1: The general xTB review is always relevant.
try:
self.references.cite(
raw=self._bibliography["Bannwarth2021"],
alias="xtb",
module="xtb_step",
level=1,
note="The principle xTB program citation.",
)
except KeyError:
logger.debug("Bannwarth2021 missing from bibliography")
# Level 1: Method-specific citation
method_to_bib = {
"GFN2-xTB": "Bannwarth2019",
"GFN1-xTB": "Grimme2017",
"GFN0-xTB": "Pracht2019",
"GFN-FF": "Spicher2020",
}
bib_key = method_to_bib.get(method)
if bib_key:
try:
self.references.cite(
raw=self._bibliography[bib_key],
alias=method,
module="xtb_step",
level=1,
note=f"Citation for the {method} method.",
)
except KeyError:
logger.debug(f"{bib_key} missing from bibliography")
# Level 1: Solvation citation
if solvation_model in ("ALPB", "GBSA"):
try:
self.references.cite(
raw=self._bibliography["Ehlert2021"],
alias="alpb",
module="xtb_step",
level=1,
note=f"Citation for the {solvation_model} solvation model.",
)
except KeyError:
logger.debug("Ehlert2021 missing from bibliography")
elif solvation_model == "CPCM-X":
try:
self.references.cite(
raw=self._bibliography["Stahn2023"],
alias="cpcmx",
module="xtb_step",
level=1,
note="Citation for the CPCM-X solvation model.",
)
except KeyError:
logger.debug("Stahn2023 missing from bibliography")
# DFT-D4 dispersion citations -- GFN2-xTB uses a
# density-dependent dispersion correction in the D4 family. The
# D4 papers are level-1 citations whenever GFN2 is used,
# following the MOPAC convention of citing dispersion
# corrections at level 1. (GFN1, GFN0, GFN-FF use older D3-style
# or other dispersion treatments and are NOT cited here.)
if method == "GFN2-xTB":
for d4_key, d4_note in (
("Caldeweyher2017", "DFT-D4 dispersion correction (original)."),
("Caldeweyher2019", "DFT-D4 dispersion correction (revised)."),
("Caldeweyher2020", "DFT-D4 dispersion correction (extended)."),
):
try:
self.references.cite(
raw=self._bibliography[d4_key],
alias=d4_key,
module="xtb_step",
level=1,
note=d4_note,
)
except KeyError:
logger.debug(f"{d4_key} missing from bibliography")
# ------------------------------------------------------------------
# Analysis / printing -- table-based
# ------------------------------------------------------------------
[docs]
def analyze(self, indent="", data=None, table=None, P=None):
"""Print the results as a tabulated summary in step.out.
Follows the Gaussian step pattern: subclasses build up a single
``table`` dict and pass it to ``super().analyze()``. The base
Energy class adds the basic energy/HOMO/LUMO/dipole rows and
prints the table at the end. This way, an Optimization or
Frequencies substep can prepend its own rows (e.g. converged-
energy, ZPE) in a single, ordered table.
Parameters
----------
indent : str
Extra indentation (currently unused; ``self.indent`` controls
the final wrap indent).
data : dict
Results from ``_collect_results`` and any subclass extensions.
table : dict, optional
A dict with three lists -- ``"Property"``, ``"Value"``,
``"Units"`` -- that subclasses have already populated. If
None, a fresh empty table is created here.
P : dict, optional
The current parameter values. Not directly used in this
base implementation but accepted so subclasses can pass it.
"""
if data is None:
data = {}
if table is None:
table = {"Property": [], "Value": [], "Units": []}
metadata = xtb_step.metadata.get("results", {})
# Standard energy / orbital / dipole rows. Order matters for
# readability.
keys = (
"total_energy",
"electronic_energy",
"homo_energy",
"lumo_energy",
"homo_lumo_gap",
"dipole_moment",
)
for key in keys:
if key not in data:
continue
mdata = metadata.get(key, {})
label = mdata.get("description", key)
fmt = mdata.get("format", ".4f")
units = mdata.get("units", "")
try:
value_str = f"{float(data[key]):{fmt}}"
except (TypeError, ValueError):
value_str = str(data[key])
table["Property"].append(label)
table["Value"].append(value_str)
table["Units"].append(units)
if not table["Property"]:
printer.normal(
__(
"No results were collected for this xTB step. Check "
"xtb.out and xtb.err for errors.",
indent=4 * " ",
wrap=True,
dedent=False,
)
)
return
tmp = tabulate(
table,
headers="keys",
tablefmt="rounded_outline",
colalign=("center", "decimal", "left"),
disable_numparse=True,
)
length = len(tmp.splitlines()[0])
text_lines = []
title = "xTB Results"
if self._model:
title = f"xTB ({self._model}) Results"
text_lines.append(title.center(length))
text_lines.append(tmp)
text = textwrap.indent("\n".join(text_lines), self.indent + 4 * " ")
printer.normal("")
printer.normal(text)
printer.normal("")