Source code for xtb_step.energy

# -*- coding: utf-8 -*-

"""Non-graphical part of the Energy step in an xTB flowchart."""

import importlib.resources
import logging
import math
from pathlib import Path
import pprint  # noqa: F401
import textwrap

from tabulate import tabulate

import xtb_step
from .substep import Substep
import molsystem
import seamm
from seamm_util import ureg, Q_  # noqa: F401
import seamm_util.printing as printing
from seamm_util.printing import FormattedText as __

logger = logging.getLogger(__name__)
job = printing.getPrinter()
printer = printing.getPrinter("xTB")

# Add this module's properties to the standard properties
path = importlib.resources.files("xtb_step") / "data"
csv_file = path / "properties.csv"
if path.exists():
    molsystem.add_properties_from_file(csv_file)

# Conversion factor: xtb reports dipole in atomic units (e * Bohr).
#                   1 a.u. (dipole) = 2.541746229 D (CODATA via Pint).
# We compute it from Pint to stay consistent with the rest of SEAMM.
_AU_DIPOLE_TO_DEBYE = (
    (1.0 * ureg.elementary_charge * ureg.bohr).to(ureg.debye).magnitude
)


[docs] class Energy(Substep): """Run an xTB single-point energy. See Also -------- Substep, EnergyParameters, TkEnergy """ def __init__(self, flowchart=None, title="Energy", extension=None, logger=logger): """A substep for an xTB single-point energy. Parameters ---------- flowchart : seamm.Flowchart The non-graphical flowchart that contains this step. title : str The name displayed in the flowchart. extension : None Not yet implemented. logger : Logger The logger to use and pass to parent classes. """ logger.debug(f"Creating Energy {self}") super().__init__( flowchart=flowchart, title=title, extension=extension, module=__name__, logger=logger, ) self._calculation = "Energy" self._model = None self._metadata = xtb_step.metadata self.parameters = xtb_step.EnergyParameters() @property def version(self): """The semantic version of this module.""" return xtb_step.__version__ @property def git_revision(self): """The git version of this module.""" return xtb_step.__git_revision__ # ------------------------------------------------------------------ # Description # ------------------------------------------------------------------
[docs] def description_text(self, P=None, calculation_type="single-point energy"): """Create the text description of what this step will do. Note: charge and spin multiplicity are NOT mentioned here. They are properties of the system, not of the calculation, and the SEAMM convention is to not echo them at the step level. """ if not P: P = self.parameters.values_to_dict() method = P["method"] text = ( f"An xTB {calculation_type} calculation using the {method} " "Hamiltonian" ) # Solvation smodel = P.get("solvation model", "none") if smodel != "none": text += f" with implicit solvation in {P['solvent']} ({smodel})" text += "." # Accuracy try: acc = float(P.get("accuracy", 1.0)) except (TypeError, ValueError): acc = 1.0 if acc != 1.0: text += f" SCC accuracy multiplier set to {acc:g}." return self.header + "\n" + __(text, **P, indent=4 * " ").__str__()
# ------------------------------------------------------------------ # Run # ------------------------------------------------------------------
[docs] def run(self, extra_args=None): """Run the Energy step. Parameters ---------- extra_args : list of str, optional Additional xtb command-line arguments to append after the base arguments. Used by Optimization and Frequencies subclasses to inject ``--opt`` / ``--ohess`` flags. Defaults to none. Returns ------- seamm.Node The next node object in the flowchart. """ next_node = super().run(printer) # Resolve parameter values (dereferences any flowchart variables) P = self.parameters.current_values_to_dict( context=seamm.flowchart_variables._data ) # Print what we are doing printer.important(__(self.description_text(P), indent=self.indent)) directory = Path(self.directory) directory.mkdir(parents=True, exist_ok=True) # Get the current configuration; refuse periodic input. # Charge and spin multiplicity come from the configuration, not P. _, configuration = self.get_system_configuration(None) self.check_periodicity(configuration) # Lock in the active model so store_results() formats property names # correctly. self._model = P["method"] # Cite the xTB references appropriate for this calculation. self._cite_references(P["method"], P.get("solvation model", "none")) # Write coord.xyz self.write_coord_xyz(directory, configuration) # Build the command line. base_xtb_args() reads charge and # multiplicity from the configuration. args = self.base_xtb_args(P, configuration) if extra_args: args += list(extra_args) # Allow user-specified extra keywords to be appended. extra_kw = P.get("extra keywords", []) or [] if isinstance(extra_kw, str): extra_kw = extra_kw.split() args += [str(k) for k in extra_kw] # Default returned files. Globs ensure we pick up files xtb writes # whose names depend on the calculation type: # *.xyz -- xtbopt.xyz and any other geometry files xtb writes # *.mol -- xtbtopo.mol (topology with bond orders) # *.log -- xtbopt.log (optimization trajectory) # vibspectrum -- IR spectrum (Turbomole format) from Hessian runs # hessian -- Hessian matrix (Turbomole format) # g98.out -- Gaussian-98-format output for visualization # Subclasses may extend this list further. return_files = [ "xtbout.json", "xtbrestart", "*.xyz", "*.mol", "*.log", "vibspectrum", "hessian", "g98.out", ] result = self.run_xtb(args, return_files=return_files) # Persist stdout/stderr for the user if result is not None: (directory / "xtb.out").write_text(result.get("stdout", "")) stderr = result.get("stderr", "") if stderr: (directory / "xtb.err").write_text(stderr) # Harvest results data = self._collect_results(directory, configuration, result) # Store in the SEAMM property database / variables / tables self.store_results(configuration=configuration, data=data) # Print a brief summary into step.out as a table self.analyze(data=data, P=P) return next_node
# ------------------------------------------------------------------ # Result collection # ------------------------------------------------------------------ def _collect_results(self, directory, configuration, run_result): """Pull values from xtbout.json (and stdout if needed) into a dict. Keys here must match the keys in ``metadata["results"]``. """ data = {} json_data = self.read_xtbout_json(directory) if json_data: self._harvest_json(json_data, data, configuration) return data def _harvest_json(self, json_data, data, configuration): """Populate ``data`` from the xtbout.json contents. The schema as documented by xTB uses these top-level keys, with mild version-to-version drift: * ``"total energy"`` (E_h) * ``"electronic energy"`` (E_h) * ``"HOMO-LUMO gap / eV"`` * ``"dipole / a.u."`` (3-vector) * ``"partial charges"`` Defensive: a missing key just means that result does not get stored, not that the run failed. """ if "total energy" in json_data: data["total_energy"] = float(json_data["total energy"]) if "electronic energy" in json_data: data["electronic_energy"] = float(json_data["electronic energy"]) # Gap for key in ("HOMO-LUMO gap / eV", "HOMO-LUMO gap/eV"): if key in json_data: data["homo_lumo_gap"] = float(json_data[key]) break # Orbital energies if present (some xtb versions include them) for key in ("HOMO orbital eigenvalue / eV", "HOMO/eV"): if key in json_data: data["homo_energy"] = float(json_data[key]) break for key in ("LUMO orbital eigenvalue / eV", "LUMO/eV"): if key in json_data: data["lumo_energy"] = float(json_data[key]) break # Dipole. xtb gives the vector in a.u.; we store both vector and # magnitude, converted to debye. dipole_au = None for key in ("dipole / a.u.", "dipole/a.u."): if key in json_data: dipole_au = json_data[key] break if dipole_au is not None and len(dipole_au) >= 3: dx, dy, dz = (float(c) for c in dipole_au[:3]) mag_au = math.sqrt(dx * dx + dy * dy + dz * dz) data["dipole_vector"] = [ dx * _AU_DIPOLE_TO_DEBYE, dy * _AU_DIPOLE_TO_DEBYE, dz * _AU_DIPOLE_TO_DEBYE, ] data["dipole_moment"] = mag_au * _AU_DIPOLE_TO_DEBYE if "partial charges" in json_data: data["partial_charges"] = [float(q) for q in json_data["partial charges"]] # ------------------------------------------------------------------ # Citations # ------------------------------------------------------------------ def _cite_references(self, method, solvation_model): """Add the appropriate citations to the references handler. SEAMM's convention (per MOPAC) is to put at level 1 every paper that contributed to producing the result -- the program, the method, the dispersion correction, the solvation model, etc. -- and let the user cull the list to fit the citation budget of their target journal. Level 2 is for component-of-component references that a typical paper would not include. For an xTB calculation the level-1 citations are: - General xTB review (Bannwarth2021), always - The method-specific paper (Bannwarth2019, Grimme2017, Pracht2019, Spicher2020) for the active Hamiltonian - The implicit-solvation paper (Ehlert2021 for ALPB/GBSA, Stahn2023 for CPCM-X) when solvation is on - The DFT-D4 papers (Caldeweyher 2017, 2019, 2020) when GFN2-xTB is used, since GFN2 incorporates a D4-style dispersion correction. Other GFN methods use older or different dispersion treatments and do not trigger these. """ if self.references is None: return # Level 1: The general xTB review is always relevant. try: self.references.cite( raw=self._bibliography["Bannwarth2021"], alias="xtb", module="xtb_step", level=1, note="The principle xTB program citation.", ) except KeyError: logger.debug("Bannwarth2021 missing from bibliography") # Level 1: Method-specific citation method_to_bib = { "GFN2-xTB": "Bannwarth2019", "GFN1-xTB": "Grimme2017", "GFN0-xTB": "Pracht2019", "GFN-FF": "Spicher2020", } bib_key = method_to_bib.get(method) if bib_key: try: self.references.cite( raw=self._bibliography[bib_key], alias=method, module="xtb_step", level=1, note=f"Citation for the {method} method.", ) except KeyError: logger.debug(f"{bib_key} missing from bibliography") # Level 1: Solvation citation if solvation_model in ("ALPB", "GBSA"): try: self.references.cite( raw=self._bibliography["Ehlert2021"], alias="alpb", module="xtb_step", level=1, note=f"Citation for the {solvation_model} solvation model.", ) except KeyError: logger.debug("Ehlert2021 missing from bibliography") elif solvation_model == "CPCM-X": try: self.references.cite( raw=self._bibliography["Stahn2023"], alias="cpcmx", module="xtb_step", level=1, note="Citation for the CPCM-X solvation model.", ) except KeyError: logger.debug("Stahn2023 missing from bibliography") # DFT-D4 dispersion citations -- GFN2-xTB uses a # density-dependent dispersion correction in the D4 family. The # D4 papers are level-1 citations whenever GFN2 is used, # following the MOPAC convention of citing dispersion # corrections at level 1. (GFN1, GFN0, GFN-FF use older D3-style # or other dispersion treatments and are NOT cited here.) if method == "GFN2-xTB": for d4_key, d4_note in ( ("Caldeweyher2017", "DFT-D4 dispersion correction (original)."), ("Caldeweyher2019", "DFT-D4 dispersion correction (revised)."), ("Caldeweyher2020", "DFT-D4 dispersion correction (extended)."), ): try: self.references.cite( raw=self._bibliography[d4_key], alias=d4_key, module="xtb_step", level=1, note=d4_note, ) except KeyError: logger.debug(f"{d4_key} missing from bibliography") # ------------------------------------------------------------------ # Analysis / printing -- table-based # ------------------------------------------------------------------
[docs] def analyze(self, indent="", data=None, table=None, P=None): """Print the results as a tabulated summary in step.out. Follows the Gaussian step pattern: subclasses build up a single ``table`` dict and pass it to ``super().analyze()``. The base Energy class adds the basic energy/HOMO/LUMO/dipole rows and prints the table at the end. This way, an Optimization or Frequencies substep can prepend its own rows (e.g. converged- energy, ZPE) in a single, ordered table. Parameters ---------- indent : str Extra indentation (currently unused; ``self.indent`` controls the final wrap indent). data : dict Results from ``_collect_results`` and any subclass extensions. table : dict, optional A dict with three lists -- ``"Property"``, ``"Value"``, ``"Units"`` -- that subclasses have already populated. If None, a fresh empty table is created here. P : dict, optional The current parameter values. Not directly used in this base implementation but accepted so subclasses can pass it. """ if data is None: data = {} if table is None: table = {"Property": [], "Value": [], "Units": []} metadata = xtb_step.metadata.get("results", {}) # Standard energy / orbital / dipole rows. Order matters for # readability. keys = ( "total_energy", "electronic_energy", "homo_energy", "lumo_energy", "homo_lumo_gap", "dipole_moment", ) for key in keys: if key not in data: continue mdata = metadata.get(key, {}) label = mdata.get("description", key) fmt = mdata.get("format", ".4f") units = mdata.get("units", "") try: value_str = f"{float(data[key]):{fmt}}" except (TypeError, ValueError): value_str = str(data[key]) table["Property"].append(label) table["Value"].append(value_str) table["Units"].append(units) if not table["Property"]: printer.normal( __( "No results were collected for this xTB step. Check " "xtb.out and xtb.err for errors.", indent=4 * " ", wrap=True, dedent=False, ) ) return tmp = tabulate( table, headers="keys", tablefmt="rounded_outline", colalign=("center", "decimal", "left"), disable_numparse=True, ) length = len(tmp.splitlines()[0]) text_lines = [] title = "xTB Results" if self._model: title = f"xTB ({self._model}) Results" text_lines.append(title.center(length)) text_lines.append(tmp) text = textwrap.indent("\n".join(text_lines), self.indent + 4 * " ") printer.normal("") printer.normal(text) printer.normal("")