Source code for molsystem.system_db

# -*- coding: utf-8 -*-

"""A dictionary-like object for holding a system
"""

import collections.abc
import logging
import sqlite3

import molsystem
from .cif import CIFMixin
from .configuration import _Configuration
from .properties import _Properties
from .system import _System
from .table import _Table
from .templates import _Templates

logger = logging.getLogger(__name__)



[docs]
class SystemDB(CIFMixin, collections.abc.MutableMapping):
    """A database of systems for SEAMM.

    A class based on a SQLite database for describing molecular and
    periodic systems.

    See the documentation at https://molssi-seamm.github.io/molsystem
    for a complete description of the database schema, including a
    diagram.

    The key concepts are:

    System
        The overall container for the configurations, atoms, bonds,
        etc. A system has one or more configurations (conformers),
        each of which details the atoms, bonds, etc.

    Configuration
        The configuration is a single instance of the system, with
        atoms, coordinates, bonds and subsets. It is close to what
        most programs consider the molecule or crystal.

        There may be different set of atoms in different
        configurations of a single system. This supports e.g. grand
        canonical ensembles.

        The set of bonds may also differ between different
        configurations, whether or not the set of atoms changes. This
        supports e.g. reactive forcefields.

        Each configuration also has its own set of subsets, which are
        a way of collecting atoms into groups, and can be thought of
        as a generalization of the chain and residue nomenclature for
        proteins. A configuration may have any number of subsets, and
        atoms may be in more than one subset.

    Atoms
        Each configuration contains a set of atoms, though different
        configurations may contain different atoms. Each atom is
        identified by its atomic number and unique name and has
        coordinates. It may also have other attributes, but that
        depends on the simulation.

    Bonds
        Each configuration may also have information on bonds between
        the atoms. A bond connects two atoms, and has a bond order
        (single, double, triple, aromatic, ...). In periodic systems
        with an infinite network of bonds, each bond also has a cell
        offset to identify the relative cells of the two atoms.

    Templates and Subsets
        Subsets define groups of atoms in a general way. They are
        defined by a template, which may be nothing more than a type
        and name, e.g. 'residue/ala', or the template may be linked
        with a system which has atoms, bonds, etc.

        Each subset is linked to its group atoms. For example, the
        template 'residue/ala' mentioned above would have a subset for
        each alanine in the protein. If the template were linked to a
        system which was the alanine residue, then each atom would
        also be connected with the appropriate atom in the template
        system, so even if the order and names of atoms in the system
        were different, we could still identify each atom with the
        corresponding atom in the template system.

    The tables that implement this are:

    system
        The list of all the systems in the database

    configuration
        The list of all configurations of all systems, labeled by the
        system they belong to.

    atom
        The list of atoms in all systems and configurations. The
        attributes of the atoms do not depend on the configuration,
        i.e. are unchanging.

    bond
        The list of all bonds in all systems and configurations,
        giving the two atoms that are bonded plus the bond order.

    coordinates
        The fractional or Cartesian coordinates of the atoms as well
        as any other atomic properties that vary by configuration.

    subset
        The instances of subsets.

    element
        The periodic table, used as a foreign key to identify atoms.

    template
        The templates -- labels -- for subsets, which may connect the
        subset to a template system.

    configuration_subset
        A joining table used to define which subsets are "in" a
        configuration.

    subset_atom
        A joining table to define with atoms are in a subset. There is
        an optional field for the template atom if the template has an
        associated system. In this case, the template atom identifies
        which atom in the template system is the same as the given
        atom.

    symmetry
        Information about point or space group symmetry.

    cell
        The information about the periodicity of a configuration, if
        needed.

    atomset
        A set of atoms, used with a joining table to connect atoms
        with configurations.

    atomset_atom
        The joining table connecting atomsets with their atoms.

    bondset
        A set of bonds, used with a joining table to connect bonds
        with configurations.

    bondset_bond
        The joining table connecting bondsets with their bonds.
    """

    def __init__(self, parent=None, logger=logger, **kwargs):
        self._parent = parent
        self.logger = logger
        self._attached = {}
        self._current_system_id = None  # The id of the current system
        self._checkpoints = []
        self._filename = None
        self._db = None
        self._cursor = None
        self._items = {}

        if "filename" in kwargs:
            self.filename = kwargs.pop("filename")
        else:
            self.filename = "seamm.db"

    def __del__(self):
        """Destructor: need to close the database if any."""

        # Delete any cached objects
        del self._items

        # And close the database
        if self._db is not None:
            self.db.commit()
            self.db.close()

    def __enter__(self):
        self.db.commit()

        backup = self.parent.copy_system(self, temporary=True)
        self._checkpoints.append(backup)
        return self

    def __exit__(self, etype, value, traceback):
        backup = self._checkpoints.pop()

        if etype is None:
            self.db.commit()

            # Log the changes
            diffs = self.diff(backup)
            if len(diffs) > 0:
                self.version = self.version + 1

            # Not sure why this commit is needed...
            self.db.commit()
        else:
            self.parent.overwrite(self, backup)

        # and delete the copy
        del self.parent[backup.nickname]

    def __getitem__(self, key):
        """Allow [] access to the dictionary!

        Because some of the items, such as template contain state,
        we need to ensure that the same object is used everywhere. Hence
        the self._items array to store the instances.

        Parameters
        ----------
        key : str
            The table name or name of a multi-table item like atoms.

        Returns
        -------
        table : Table
        """
        if key not in self._items:
            self._items[key] = _Table(self, key)
        return self._items[key]

    def __setitem__(self, key, value):
        """Allow x[key] access to the data"""
        raise NotImplementedError(f"Table '{key}' cannot be created yet")

    def __delitem__(self, key):
        """Allow deletion of keys"""
        if key in self:
            self.cursor.execute(f"DROP TABLE '{key}'")

    def __iter__(self):
        """Allow iteration over the object"""
        return iter(self.list())

    def __len__(self):
        """The len() command"""
        self.cursor.execute(
            "SELECT COUNT(*)" "  FROM sqlite_master" " WHERE type = 'table'"
        )
        return self.cursor.fetchone()[0]

    # def __repr__(self):
    #     """The string representation of this object"""
    #     raise NotImplementedError()

    # def __str__(self):
    #     """The pretty string representation of this object"""
    #     raise NotImplementedError()

    def __contains__(self, table):
        """Return a boolean indicating if a key exists."""
        # Normal the tablename is used as an identifier, so is quoted with ".
        # Here we need it as a string literal so strip any quotes from it.
        if "." in table:
            schema, table = table.split(".")
            schema = schema.strip('"')
        else:
            schema = "main"

        table = table.strip('"')
        self.cursor.execute(
            "SELECT COUNT(*)"
            f" FROM {schema}.sqlite_master"
            f" WHERE type = 'table' and name = '{table}'"
        )
        return self.cursor.fetchone()[0] == 1

    def __eq__(self, other):
        """Return a boolean if this object is equal to another"""
        # LGTM doesn't like using 'is', but this is equivalent.
        if self.id() == other.id():
            return True

        tables = set(self.list())
        other_tables = set(other.list())

        added = tables - other_tables
        deleted = other_tables - tables
        in_common = tables & other_tables

        if len(added) > 0:
            return False
        if len(deleted) > 0:
            return False

        # Need the contents of the tables. See if they are in the same
        # database or if we need to attach the other database temporarily.
        name = self.name
        other_name = other.name
        detach = False
        if name != other_name and not self.is_attached(other):
            # Attach the other system in order to do comparisons.
            self.attach(other)
            detach = True

        # Check the tables in both systems
        result = True
        for table in in_common:
            if self[table] != other[table]:
                result = False
                break

        # Detach the other database if needed
        if detach:
            self.detach(other)

        return result

    @property
    def configurations(self):
        """The list of configuration objects."""
        return [_Configuration(_id, self) for _id in self.configuration_ids]

    @property
    def configuration_ids(self):
        """The list of configuration ids."""
        return [row[0] for row in self.db.execute("SELECT id FROM configuration")]

    @property
    def cursor(self):
        """A database cursor."""
        return self._cursor

    @property
    def db(self):
        """The database connection."""
        return self._db

    @property
    def db_version(self):
        """The version string for the database."""
        self.cursor.execute("SELECT value FROM metadata WHERE key = 'version'")
        return self.cursor.fetchone()[0]

    @property
    def filename(self):
        """The name of the file (or URI) for the database."""
        return self._filename

    @filename.setter
    def filename(self, value):
        if value != self._filename:
            if self._db is not None:
                self.cursor.close()
                self._db.commit()
                self._db.close()
                self._db = None
                self._cursor = None
        self._filename = value
        if self._filename is not None:
            if self._filename[0:5] == "file:":
                self._db = sqlite3.connect(self._filename, uri=True)
            else:
                self._db = sqlite3.connect(self._filename)
            self._db.row_factory = sqlite3.Row
            self._db.execute("PRAGMA foreign_keys = ON")
            self._db.execute("PRAGMA synchronous = normal")
            self._db.execute("PRAGMA temp_store = memory")
            self._db.execute("PRAGMA mmap_size = 30000000000")
            if "mode=ro" not in self._filename:
                self._db.execute("PRAGMA journal_mode = WAL")
            self._cursor = self._db.cursor()
            self._initialize()

    @property
    def names(self):
        """The names of the system."""
        return [row[0] for row in self.db.execute("SELECT name FROM system")]

    @property
    def n_configurations(self):
        """The number of configurations in the database."""
        return self["configuration"].n_rows

    @property
    def n_systems(self):
        """The number of systems in the database."""
        return self["system"].n_rows

    @property
    def n_templates(self):
        """The number of templates."""
        return self.templates.n_rows

    @property
    def parent(self):
        """The parent of this, i.e. a Systems object."""
        return self._parent

    @property
    def properties(self):
        """The class to handle the properties."""
        return _Properties(self)

    @property
    def system(self):
        """The current system object."""
        if self.n_systems == 0:
            return None
        if self._current_system_id is None:
            self._current_system_id = self.system_ids[-1]
        return _System(self, self._current_system_id)

    @system.setter
    def system(self, value):
        # Might be a system or an integer
        if isinstance(value, _System):
            value = value.id

        if value not in self.system_ids:
            raise KeyError(f"system '{value}' does not exist.")
        self._current_system_id = value

    @property
    def systems(self):
        """The list of system objects."""
        return [_System(self, _id) for _id in self.system_ids]

    @property
    def system_ids(self):
        """The list of system ids."""
        return [row[0] for row in self.db.execute("SELECT id FROM system")]

    @property
    def templates(self):
        """The defined templates."""
        return _Templates(self)


[docs]
    def attach(self, other):
        """Attach another system to this one's database.

        Parameters
        ----------
        other : SystemDB
            The other SystemDB object containing the database

        Returns
        -------
        name : str
            The attachment name.
        """
        if not self.is_attached(other):
            # Get a unique name for attaching as
            n = len(self._attached)
            attached_name = f"db_{n}"
            while attached_name in self._attached.values():
                n += 1
                attached_name = f"db_{n}"

            # and attach
            self.db.execute(f"ATTACH DATABASE '{other.filename}' AS '{attached_name}'")
            self._attached[other.filename] = attached_name
        return self._attached[other.filename]



[docs]
    def attached_as(self, other):
        """The attachment name for another system.

        Parameters
        ----------
        other : SystemDB
            The other SystemDB object containing the attached database

        Returns
        -------
        name : str
            The attachment name.
        """
        return self._attached[other.filename]



[docs]
    def attributes(self, tablename: str):
        """The attributes -- columns -- of a given table.

        Parameters
        ----------
        tablename : str
            The name of the table, optionally including the schema followed by
            a dot.

        Returns
        -------
        attributes : Dict[str, Any]
            A dictionary of dictionaries for the attributes and their
            descriptors
        """
        if "." in tablename:
            schema, tablename = tablename.split(".")
            sql = f"PRAGMA {schema}.table_info('{tablename}')"
        else:
            sql = f"PRAGMA table_info('{tablename}')"

        result = {}
        for line in self.db.execute(sql):
            result[line["name"]] = {
                "type": line["type"],
                "notnull": bool(line["notnull"]),
                "default": line["dflt_value"],
                "primary key": bool(line["pk"]),
            }
        return result



[docs]
    def close(self):
        """Close the database."""
        self.filename = None



[docs]
    def create_system(self, name="", make_current=True):
        """Add a new system.

        Parameters
        ----------
        name : str = None
            A user-friendly name for the system, defaults to no name.
        make_current : bool = True
            If True, make this the current system.

        Returns
        -------
        _System
            The newly created system.
        """

        _id = self["system"].append(name=name)[0]

        if make_current:
            self._current_system_id = _id

        return _System(self, _id)



[docs]
    def create_table(self, name, cls=_Table, other=None):
        """Create a new table with the given name.

        Parameters
        ----------
        name : str
            The name of the new table.

        cls : Table subclass
            The class of the new table, defaults to Table

        Returns
        -------
        table : class Table
            The new table
        """
        if name in self:
            raise KeyError(f"'{name}' already exists in the system.")

        self._items[name] = cls(self, name, other)
        return self._items[name]



[docs]
    def delete_system(self, system):
        """Delete an existing system.

        Parameters
        ----------
        system : int or _System
            The system to delete.

        Returns
        -------
        None
        """

        if isinstance(system, _System):
            sid = system.id
        else:
            sid = system

        sql = "DELETE FROM system WHERE id = ?"
        self.db.execute(sql, (sid,))

        if self._current_system_id == sid:
            self._current_system_id = None



[docs]
    def detach(self, other):
        """Detach an attached system.

        Parameters
        ----------
        other : SystemDB
            The other SystemDB object containing the database
        """
        if self.is_attached(other):
            attached_name = self.attached_as(other)
            self.cursor.execute(f'DETACH DATABASE "{attached_name}"')
            del self._attached[other.filename]



[docs]
    def diff(self, other):
        """Differences between this system and another."""
        result = {}

        if self.id() == other.id():
            return result

        tables = set(self.list())
        other_tables = set(other.list())

        added = tables - other_tables
        deleted = other_tables - tables
        in_common = tables & other_tables

        if len(added) > 0:
            result["tables added"] = added
        if len(deleted) > 0:
            result["tables deleted"] = deleted

        # Need the contents of the tables. See if they are in the same
        # database or if we need to attach the other database temporarily.
        name = self.filename
        other_name = other.filename
        detach = False
        if name != other_name and not self.is_attached(other):
            # Attach the other system in order to do comparisons.
            self.attach(other)
            detach = True

        # Check the tables in both systems
        for table in in_common:
            tmp = self[table].diff(other[table])
            if len(tmp) > 0:
                result[f"table '{table}' diffs"] = tmp

        # Detach the other database if needed
        if detach:
            self.detach(other)

        return result



[docs]
    def find_configurations(self, atomset=None, bondset=None):
        """Return the configurations that have given atom- or bondsets

        Parameters
        ----------
        atomset : int = None
            The id of the atomset.
        bondset : int = None
            The id of the bondset.

        Returns
        -------
        [_Configuration]
        """

        if atomset is not None:
            if bondset is not None:
                self.cursor.execute(
                    "SELECT id FROM configuration WHERE atomset = ? AND bondset = ?",
                    (atomset, bondset),
                )
            else:
                self.cursor.execute(
                    "SELECT id FROM configuration WHERE atomset = ?", (atomset,)
                )
            return [
                _Configuration(_id=cid, system_db=self)
                for cid in self.cursor.fetchall()
            ]
        elif bondset is not None:
            self.cursor.execute(
                "SELECT id FROM configuration WHERE bondset = ?", (bondset,)
            )
            return [
                _Configuration(_id=cid, system_db=self)
                for cid in self.cursor.fetchall()
            ]
        else:
            raise RuntimeError("Must give atomset or bondset.")



[docs]
    def get_configuration(self, cid):
        """Return the specified configuration.

        Parameters
        ----------
        cid : int
            The id of the configuration.

        Returns
        -------
        _Configuration
            The requested configuration.
        """
        return _Configuration(_id=cid, system_db=self)



[docs]
    def get_configurations(self, pattern="*"):
        """Return the configurations matching the glob pattern.

        Parameters
        ----------
        pattern : list or str = "*"
            The glob-style pattern for matching the configuration names

        Returns
        -------
        [_configuration]
            The requested configuration ids
        """
        return [
            _Configuration(_id, self) for _id in self.get_configuration_ids(pattern)
        ]



[docs]
    def get_configuration_ids(self, pattern="*"):
        """Return the configuration ids matching the glob pattern.

        Parameters
        ----------
        pattern : list or str = "*"
            The glob-style pattern for matching the configuration names

        Returns
        -------
        [int]
            The requested configuration ids
        """
        if pattern == "*":
            return [row[0] for row in self.db.execute("SELECT id FROM configuration")]

        if isinstance(pattern, list):
            sql = "\n UNION \n".join(
                ["SELECT id FROM configuration WHERE name GLOB ?"] * len(pattern)
            )
            params = pattern
        else:
            sql = "SELECT id FROM configuration WHERE name GLOB ?"
            params = (pattern,)

        return [row[0] for row in self.db.execute(sql, params)]



[docs]
    def get_system(self, id_or_name):
        """Get the specified system object.

        Parameters
        ----------
        id_or_name : int or str
            The id (int) or name (str) of the system

        Returns
        -------
        _System
            The requested system.

        Raises
        ------
        ValueError
            If the system does not exist, or more than one have the requested
            name.
        """
        if isinstance(id_or_name, str):
            # The name of the system ... find it.
            sql = "SELECT id FROM SYSTEM WHERE name = ?"
            systems = [x[0] for x in self.db.execute(sql, (id_or_name,))]
            if len(systems) == 0:
                raise ValueError(f"The system '{id_or_name}' does not exist.")
            elif len(systems) > 1:
                raise ValueError(f"There is more than one system named '{id_or_name}'")
            else:
                id_or_name = systems[0]
        return _System(self, id_or_name)



[docs]
    def get_systems(self, pattern="*"):
        """Return the systems matching the glob pattern.

        Parameters
        ----------
        pattern : list or str = "*"
            The glob-style pattern for matching the system names

        Returns
        -------
        [_system]
            The requested system ids
        """
        return [_System(self, _id) for _id in self.get_system_ids(pattern)]



[docs]
    def get_system_ids(self, pattern="*"):
        """Return the system ids matching the glob pattern.

        Parameters
        ----------
        pattern : list or str = "*"
            The glob-style pattern for matching the system names

        Returns
        -------
        [int]
            The requested system ids
        """
        if pattern == "*":
            return [row[0] for row in self.db.execute("SELECT id FROM system")]

        if isinstance(pattern, list):
            sql = "\n UNION \n".join(
                ["SELECT id FROM system WHERE name GLOB ?"] * len(pattern)
            )
            params = pattern
        else:
            sql = "SELECT id FROM system WHERE name GLOB ?"
            params = (pattern,)
        return [row[0] for row in self.db.execute(sql, params)]



[docs]
    def is_attached(self, other):
        """Return whether another system is attached to this one.

        Parameters
        ----------
        other : SystemDB
            The other SystemDB object containing the database

        Returns
        -------
        bool
            Whether the database is attached.
        """
        return other.filename in self._attached



[docs]
    def list(self):
        """Return a list of all the tables in the system."""
        result = []
        for row in self.db.execute(
            "SELECT name" "  FROM sqlite_master" " WHERE type = 'table'"
        ):
            result.append(row["name"])
        return result


    def _initialize(self):
        """Initialize the SQLite database.

        The order is a bit tricky, since many tables reference other tables,
        and hence need to be created after the ones that they reference.

        Start with the standalone tables:
        """

        # If the database is initialized, the metadata table exists.
        # In the future we might need to check the version and upgrade
        # older versions, but now at version 1.0 we are all done!
        if "metadata" not in self:
            # metadata, where we store, get the database version
            table = self["metadata"]
            table.add_attribute("key", coltype="str", pk=True)
            table.add_attribute("value", coltype="str")

            table.append(key="version", value="1.0")
            self.db.commit()

            # The element table
            table = self["element"]
            table.add_attribute("atno", coltype="int", pk=True)
            table.add_attribute("symbol", coltype="str", index="unique")
            table.add_attribute("mass", coltype="float")

            # and fill it from the element data
            for symbol, data in molsystem.elements.data.items():
                table.append(
                    symbol=symbol,
                    atno=data["atomic number"],
                    mass=data["atomic weight"],
                )
            self.db.commit()

            # Symmetry information
            table = self["symmetry"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("group", coltype="str", default="C1")
            table.add_attribute("symops", coltype="str", default="x,y,z")

            # Periodic cell information
            table = self["cell"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("a", coltype="float", default=10.0)
            table.add_attribute("b", coltype="float", default=10.0)
            table.add_attribute("c", coltype="float", default=10.0)
            table.add_attribute("alpha", coltype="float", default=90.0)
            table.add_attribute("beta", coltype="float", default=90.0)
            table.add_attribute("gamma", coltype="float", default=90.0)

            # The systems themselves
            table = self["system"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("name", coltype="str", notnull=True, default="default")

            # The atoms, and the sets of atoms
            table = self["atom"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("atno", coltype="int", references="element")

            table = self["atomset"]
            table.add_attribute("id", coltype="int", pk=True)

            table = self["atomset_atom"]
            table.add_attribute("atomset", coltype="int", references="atomset")
            table.add_attribute("atom", coltype="int", references="atom")
            self.db.execute(
                "CREATE INDEX 'idx_atomset_atom_atomset_atom' "
                'ON atomset_atom ("atomset", "atom")'
            )

            # The bonds, and sets of bonds
            table = self["bond"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("i", coltype="int", references="atom")
            table.add_attribute("j", coltype="int", references="atom")
            table.add_attribute("bondorder", coltype="int", default=1)
            table.add_attribute("symop1", coltype="str", default=".")
            table.add_attribute("symop2", coltype="str", default=".")

            table = self["bondset"]
            table.add_attribute("id", coltype="int", pk=True)

            table = self["bondset_bond"]
            table.add_attribute("bondset", coltype="int", references="bondset")
            table.add_attribute("bond", coltype="int", references="bond")
            self.db.execute(
                "CREATE INDEX 'idx_bondset_bond_bondset_bond' "
                'ON bondset_bond ("bondset", "bond")'
            )

            # Now we can set up the configurations
            table = self["configuration"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("name", coltype="str")
            table.add_attribute("system", coltype="int", references="system")
            table.add_attribute("version", coltype="int", notnull=True, default=0)
            table.add_attribute("periodicity", coltype="int", default=0)
            table.add_attribute("coordinate_system", coltype="str", default="Cartesian")
            table.add_attribute("symmetry", coltype="int", references="symmetry")
            table.add_attribute("cell", coltype="int", references="cell")
            table.add_attribute("atomset", coltype="int", references="atomset")
            table.add_attribute("bondset", coltype="int", references="bondset")
            table.add_attribute("charge", coltype="int", notnull=True, default=0)
            table.add_attribute("spin_multiplicity", coltype="int")
            table.add_attribute("n_active_electrons", coltype="int", default=0)
            table.add_attribute("n_active_orbitals", coltype="int", default=0)
            table.add_attribute("state", coltype="str", default="1")
            self.db.execute(
                "CREATE INDEX 'idx_configuration_system' ON configuration(system)"
            )

            # And coordinates, which depend on configurations
            table = self["coordinates"]
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            table.add_attribute("atom", coltype="int", references="atom")
            table.add_attribute("x", coltype="float")
            table.add_attribute("y", coltype="float")
            table.add_attribute("z", coltype="float")
            self.db.execute(
                "CREATE INDEX 'idx_coordinates_atom' ON coordinates (\"atom\")"
            )
            self.db.execute(
                "CREATE INDEX idx_coordinates_atom_configuration "
                "    ON coordinates(atom, configuration)"
            )

            # Keep the velocities separate from coordinates to keep the size down
            table = self["velocities"]
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            table.add_attribute("atom", coltype="int", references="atom")
            table.add_attribute("vx", coltype="float")
            table.add_attribute("vy", coltype="float")
            table.add_attribute("vz", coltype="float")
            self.db.execute(
                "CREATE INDEX 'idx_velocities_atom' ON velocities (\"atom\")"
            )
            self.db.execute(
                "CREATE INDEX idx_velocities_atom_configuration "
                "    ON velocities(atom, configuration)"
            )

            # Keep the gradients separate from coordinates to keep the size down
            table = self["gradients"]
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            table.add_attribute("atom", coltype="int", references="atom")
            table.add_attribute("gx", coltype="float")
            table.add_attribute("gy", coltype="float")
            table.add_attribute("gz", coltype="float")
            self.db.execute("CREATE INDEX 'idx_gradients_atom' ON gradients (\"atom\")")
            self.db.execute(
                "CREATE INDEX idx_gradients_atom_configuration "
                "    ON gradients(atom, configuration)"
            )

            # The definition of the subsets -- templates
            table = self["template"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("name", coltype="str")
            table.add_attribute("canonical_smiles", coltype="str")
            table.add_attribute("category", coltype="str", default="general")
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            self.db.execute(
                "CREATE UNIQUE INDEX 'idx_template_name_type'"
                '    ON template ("name", "type")'
            )

            # The subsets
            table = self["subset"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            table.add_attribute("template", coltype="int", references="template")
            self.db.execute(
                "CREATE INDEX subset_idx_template_configuration "
                "ON subset(template, configuration)"
            )

            # The connection between subsets and the atoms in the system
            table = self["subset_atom"]
            table.add_attribute("atom", coltype="int", references="atom")
            table.add_attribute("subset", coltype="int", references="subset")
            table.add_attribute("templateatom", coltype="int", references="atom")
            self.db.execute(
                "CREATE INDEX 'idx_subset_atom_subset_atom' "
                'ON subset_atom ("subset", "atom")'
            )

            # Collections of systems
            table = self["system_collection"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("name", coltype="str")

            table = self["system_collection_system"]
            table.add_attribute(
                "system_collection", coltype="int", references="system_collection"
            )
            table.add_attribute("system", coltype="int", references="system")
            self.db.execute(
                "CREATE INDEX 'idx_system_collection_system' "
                'ON atomset_atom ("system_collection", "system")'
            )

            # Collections of configurations
            table = self["configuration_collection"]
            table.add_attribute("id", coltype="int", pk=True)
            table.add_attribute("name", coltype="str")

            table = self["configuration_collection_configuration"]
            table.add_attribute(
                "configuration_collection",
                coltype="int",
                references="configuration_collection",
            )
            table.add_attribute(
                "configuration", coltype="int", references="configuration"
            )
            self.db.execute(
                "CREATE INDEX 'idx_configuration_collection_configuration' "
                'ON atomset_atom ("configuration_collection", "configuration")'
            )

            #####################################################
            # The star schema for storing properties and features
            #####################################################

            properties = self.properties
            properties.create_schema()

            self.db.commit()


[docs]
    def system_exists(self, id_or_name):
        """See if the given system exists.

        Parameters
        ----------
        id_or_name : int or str
            The id (int) or name (str) of the system

        Returns
        -------
        bool
            Whether it exists.
        """
        if isinstance(id_or_name, str):
            # The name of the system ... find it.
            sql = "SELECT COUNT(*) FROM SYSTEM WHERE name = ?"
            self.cursor.execute(sql, (id_or_name,))
        else:
            sql = "SELECT COUNT(*) FROM SYSTEM WHERE id = ?"
            self.cursor.execute(sql, (id_or_name,))

        return self.cursor.fetchone()[0] != 0