Source code for molsystem.cif

# -*- coding: utf-8 -*-

"""Functions for handling CIF files

Bond Orders
-----------
1 sing	single bond
2 doub	double bond
3 trip	triple bond
4 quad	quadruple bond
5 arom	aromatic bond
6 delo	delocalized double bond
7 pi	pi bond
8 poly	polymeric bond
"""

import io
import json
import logging
import re

import CifFile

logger = logging.getLogger(__name__)

bond_order = {
    1: "SING",
    2: "DOUB",
    3: "TRIP",
    4: "QUAD",
    5: "AROM",
    6: "DELO",
    7: "PI",
    8: "POLY",
}
to_bond_order = {j: i for i, j in bond_order.items()}



[docs]
class CIFMixin:
    """A mixin for handling CIF files."""


[docs]
    def read_cif_file(self, path):
        """Create new systems from a CIF file.

        Read a CIF file and create a new system from each datablock in
        the file.

        If the datablock has an ensemble, as denoted by a section
        '_pdbx_nmr_ensemble', a configuration will be created for each
        conformer. If there is a representative conformer, the current
        configuration will point to it; otherwise to the last conformer.

        Parameters
        ----------
        path : str or Path
            A string or Path object pointing to the file to be read.

        Returns
        -------
        [_System]
            List of systems created.
        """
        if "SystemDB" in str(type(self)):
            sys_db = self
        else:
            sys_db = self.system_db

        lines = []
        systems = []
        configurations = {}
        in_block = False
        block_name = ""
        with open(path, "r") as fd:
            for line in fd:
                if line[0:5] == "data_":
                    logger.debug(f"Found block {line}")
                    if not in_block:
                        in_block = True
                    else:
                        new_systems, new_configurations = self.from_mmcif_text(
                            "\n".join(lines)
                        )
                        logger.debug(
                            f"   added system {sys_db.n_systems}: {block_name}"
                        )
                        systems.extend(new_systems)
                        configurations.update(new_configurations)
                    block_name = line[5:].strip()
                    lines = []
                lines.append(line)

            if len(lines) > 0:
                # The last block just ends at the end of the file
                new_systems, new_configurations = self.from_mmcif_text("\n".join(lines))
                logger.debug(f"   added system {sys_db.n_systems}: {block_name}")
                systems.extend(new_systems)
                configurations.update(new_configurations)

        return systems, configurations



[docs]
    def to_cif_text(self):
        """Create the text of a CIF file from this configuration.

        Returns
        -------
        text : str
            The text of the file.
        """

        lines = []

        atoms = self.atoms

        # Get the chemical formula
        formula, empirical_formula, Z = self.formula

        system_name = self.system.name
        conf_name = self.name
        if system_name == "" or conf_name == "":
            _id = empirical_formula.replace(" ", "")
        else:
            _id = f"SEAMM:{system_name}/{conf_name}"
            _id = _id.replace(" ", "")

        # And create the file, line-by-line
        lines = []
        lines.append("# Generated by MolSSI SEAMM")
        lines.append(f"data_{_id}")

        # Cell information
        if self.periodicity == 3:
            cell = self.cell
            symmetry = self.symmetry
            a, b, c, alpha, beta, gamma = cell.parameters
            volume = cell.volume
            spgname = symmetry.group
            if spgname == "":
                pass
            elif symmetry.n_symops == 1:
                lines.append("_space_group_name_H-M_full   'P 1'")
            else:
                spgname_system = symmetry.spacegroup_names_to_system()[spgname]
                lines.append(f"_space_group_{spgname_system}       '{spgname}'")
                hall = symmetry.hall_symbol
                lines.append(f"_symmetry_space_group_name_Hall '{hall}'")
                it_number = symmetry.IT_number
                lines.append(f"_space_group_IT_number          {it_number}")
            lines.append(f"_cell_length_a   {a}")
            lines.append(f"_cell_length_b   {b}")
            lines.append(f"_cell_length_c   {c}")
            lines.append(f"_cell_angle_alpha   {alpha}")
            lines.append(f"_cell_angle_beta    {beta}")
            lines.append(f"_cell_angle_gamma   {gamma}")
            lines.append(f"_cell_volume   {volume}")
            lines.append(f"_cell_formula_units_Z   {Z}")
            lines.append("loop_")
            lines.append(" _symmetry_equiv_pos_site_id")
            lines.append(" _symmetry_equiv_pos_as_xyz")
            if symmetry.n_symops == 1:
                lines.append("  1  x,y,z")
            else:
                for i, op in enumerate(symmetry.symops, start=1):
                    lines.append(f" {i:2} {op}")

        lines.append(f"_chemical_formula_structural   '{empirical_formula}'")
        lines.append(f"_chemical_formula_sum   '{formula}'")

        # The atoms
        lines.append("loop_")
        lines.append(" _atom_site_type_symbol")
        lines.append(" _atom_site_label")
        lines.append(" _atom_site_symmetry_multiplicity")
        lines.append(" _atom_site_fract_x")
        lines.append(" _atom_site_fract_y")
        lines.append(" _atom_site_fract_z")
        lines.append(" _atom_site_occupancy")

        # Need unique names
        if "names" in atoms:
            original_names = atoms.get_column("names")
        else:
            original_names = atoms.asymmetric_symbols

        names = []
        tmp = {}
        for name in original_names:
            if name in tmp:
                tmp[name] += 1
                names.append(name + str(tmp[name]))
            else:
                tmp[name] = 1
                names.append(name + "1")

        if symmetry.n_symops == 1:
            UVW = atoms.get_coordinates(
                fractionals=True, in_cell="molecule", asymmetric=True
            )
        else:
            # For the moment can't center molecules in cell with symmetry.
            UVW = atoms.get_coordinates(fractionals=True, asymmetric=True)

        symbols = atoms.asymmetric_symbols
        for element, name, uvw in zip(symbols, names, UVW):
            u, v, w = uvw
            lines.append(f"  {element} {name}  1  {u:.3f} {v:.3f} {w:.3f}  1")

        # Handle bonds.
        if self.n_asymmetric_bonds > 0:
            bonds = self.bonds
            sym_bonds = bonds.bonds_for_asymmetric_bonds
            indx = {j: i for i, j in enumerate(atoms.ids)}
            Is = [indx[i] for i in bonds.get_column_data("i")]
            Js = [indx[j] for j in bonds.get_column_data("j")]
            op1s = bonds.get_column_data("symop1")
            op2s = bonds.get_column_data("symop2")
            Rs = bonds.get_lengths(asymmetric=True)
            lines.append("loop_")
            lines.append("_geom_bond_atom_site_label_1")
            lines.append("_geom_bond_atom_site_label_2")
            lines.append("_geom_bond_distance")
            lines.append("_geom_bond_site_symmetry_1")
            lines.append("_geom_bond_site_symmetry_2")
            for i, j, r, op1, op2, n_sym_bonds in zip(
                Is, Js, Rs, op1s, op2s, [len(x) for x in sym_bonds]
            ):
                if n_sym_bonds > 0:
                    lines.append(f"  {names[i]} {names[j]} {r:.4f} {op1} {op2}")

        # And that is it!
        lines.append("")
        return "\n".join(lines)



[docs]
    def from_cif_text(self, text):
        """Create this configuration from a CIF file..

        Parameters
        ----------
        text : str
            The text from the CIF file

        Returns
        -------
        None
        """

        result = ""
        cif = CifFile.ReadCif(io.StringIO(text))

        data_blocks = [*cif.keys()]

        if len(data_blocks) != 1:
            raise RuntimeError(
                f"There are {len(data_blocks)} data blocks in the cif file."
            )
        data_block = cif[data_blocks[0]]

        logger.debug(json.dumps({**data_block}, indent=4, sort_keys=True))

        # Reset the system
        self.clear()

        # The cell
        dot = "."
        if "_cell_length_a" in data_block:
            dot = "_"
        a = data_block["_cell" + dot + "length_a"].split("(")[0]
        b = data_block["_cell" + dot + "length_b"].split("(")[0]
        c = data_block["_cell" + dot + "length_c"].split("(")[0]
        alpha = data_block["_cell" + dot + "angle_alpha"].split("(")[0]
        beta = data_block["_cell" + dot + "angle_beta"].split("(")[0]
        gamma = data_block["_cell" + dot + "angle_gamma"].split("(")[0]
        if float(a) != 1 and float(b) != 1 and float(c) != 1:
            self.periodicity = 3
            self.coordinate_system = "fractional"
            self.cell.parameters = (a, b, c, alpha, beta, gamma)

            # Where is the symmetry info?
            used_symops = False
            if "_space_group_symop" + dot + "operation_xyz" in data_block:
                symdata = "_space_group_symop" + dot + "operation_xyz"
                self.symmetry.symops = data_block[symdata]
                used_symops = True
            elif "_symmetry_equiv" + dot + "pos_as_xyz" in data_block:
                symdata = "_symmetry_equiv" + dot + "pos_as_xyz"
                self.symmetry.symops = data_block[symdata]
                used_symops = True
            else:
                found = False
                for section in (
                    "_space_group" + dot + "name_Hall",
                    "_space_group" + dot + "name_H-M_full",
                    "_space_group" + dot + "name_H-M_alt",
                    "_symmetry" + dot + "space_group_name_Hall",
                    "_symmetry" + dot + "space_group_name_H-M",
                ):
                    if section in data_block:
                        found = True
                        self.symmetry.group = data_block[section]
                if not found:
                    raise RuntimeError(
                        "CIF file does not contain required symmetry information. "
                        "Neither "
                        "'_symmetry_equiv" + dot + "pos_as_xyz' or "
                        "'_space_group_symop" + dot + "operation_xyz' or "
                        "_symmetry" + dot + "space_group_name_Hall  or "
                        "_symmetry" + dot + "space_group_name_H-M "
                        "is present."
                    )

            x_label = "_atom_site" + dot + "fract_x"
            if x_label in data_block:
                have_fractionals = True
                y_label = "_atom_site" + dot + "fract_y"
                z_label = "_atom_site" + dot + "fract_z"
            else:
                have_fractionals = False
                x_label = "_atom_site" + dot + "Cartn_x"
                y_label = "_atom_site" + dot + "Cartn_y"
                z_label = "_atom_site" + dot + "Cartn_z"
        else:
            x_label = "_atom_site" + dot + "Cartn_x"
            y_label = "_atom_site" + dot + "Cartn_y"
            z_label = "_atom_site" + dot + "Cartn_z"

        n_atoms = len(data_block[x_label])

        # Check for alternate sites, warn and take highest occupation
        use_alt_id = None
        labels = []
        alt_ids = None
        if "_atom_site" + dot + "label_alt_id" in data_block:
            key = "_atom_site" + dot + "label_alt_id"
            if any(x != "." for x in data_block[key]):
                comp_id = "_atom_site" + dot + "label_comp_id"
                asym_id = "_atom_site" + dot + "label_asym_id"
                entity_id = "_atom_site" + dot + "label_entity_id"
                seq_id = "_atom_site" + dot + "label_seq_id"
                if (
                    comp_id in data_block
                    and asym_id in data_block
                    and entity_id in data_block
                    and seq_id in data_block
                ):
                    alt_ids = data_block[key]
                    comp_ids = data_block[comp_id]
                    asym_ids = data_block[asym_id]
                    entity_ids = data_block[entity_id]
                    seq_ids = data_block[seq_id]

                    occupancy = "_atom_site" + dot + "occupancy"
                    if occupancy in data_block:
                        occs = data_block[occupancy]
                    else:
                        result += (
                            "Warning: No occupancy information in CIF file with alt "
                            "ids.\n\n"
                        )
                        occs = [1] * n_atoms

                    label = ""
                    alt_data = {}
                    for alt, comp, asym, entity, seq, occ in zip(
                        alt_ids, comp_ids, asym_ids, entity_ids, seq_ids, occs
                    ):
                        label = (asym, entity, seq)
                        labels.append(label)
                        if alt != ".":
                            if label not in alt_data:
                                alt_data[label] = {}
                            data = alt_data[label]
                            if alt not in data:
                                data[alt] = {"occ": float(occ), "count": 1, "res": comp}
                            else:
                                data[alt]["occ"] += float(occ)
                                data[alt]["count"] += 1

                    if len(alt_data) > 0:
                        key = "_entry" + dot + "id"
                        if key in data_block:
                            result += f"Entry {data_block[key]} "
                        else:
                            result += "An entry "
                        result += (
                            "in the CIF file has multiple alternates. "
                            "Picking those with the largest occupancy:\n\n"
                        )
                    use_alt_id = {}
                    for label, data in alt_data.items():
                        max_occ = -1.0
                        for alt, tmp in data.items():
                            occ = tmp["occ"] / tmp["count"]
                            tmp["occ"] = occ
                            if occ > max_occ:
                                use_alt_id[label] = alt
                                max_occ = occ

                    for label, val in use_alt_id.items():
                        comp = alt_data[label][val]["res"]
                        chain, mol, entity = label
                        line = (
                            f"  {mol} {chain} {comp}{entity} alt_id={val} selected from"
                        )
                        for alt, tmp in alt_data[label].items():
                            line += f" | {alt} {tmp['res']} {tmp['occ']:.2f} "
                        result += line
                        result += "\n"

        if alt_ids is None:
            alt_ids = ["."] * n_atoms
            labels = [""] * n_atoms

        xs = []
        ys = []
        zs = []
        symbols = []
        names = []
        # May have type symbols or labels, or both. Use type symbols by preference
        if "_atom_site" + dot + "type_symbol" in data_block:
            type_section = "_atom_site" + dot + "type_symbol"
        elif "_atom_site" + dot + "label" in data_block:
            type_section = "_atom_site" + dot + "label"
        else:
            raise KeyError(
                f"Neither _atom_site{dot}type_label or _atom_site{dot}label are in file"
            )

        # May need site labels later
        if "_atom_site" + dot + "label" in data_block:
            label_section = "_atom_site" + dot + "label"
            have_names = True
        else:
            label_section = type_section
            have_names = False

        for x, y, z, symbol, label, alt, name in zip(
            data_block[x_label],
            data_block[y_label],
            data_block[z_label],
            data_block[type_section],
            labels,
            alt_ids,
            data_block[label_section],
        ):
            # Atom symbols may be followed by number, etc.
            if alt != "." and label in use_alt_id and alt != use_alt_id[label]:
                logger.debug(f"   Skipping alternate configuration {label} -- {alt}")
                continue

            if len(symbol) > 1:
                if symbol[1].isalpha():
                    symbol = symbol[0:2]
                else:
                    symbol = symbol[0]
            if symbol == "D":
                symbol = "H"

            # May have uncertainties in ()
            x = x.split("(")[0]
            y = y.split("(")[0]
            z = z.split("(")[0]

            # Check for 1/3 and 2/3
            if "." in x:
                xx, dx = x.split(".")
                if re.fullmatch(r"333+", dx):
                    x = float(xx) - 1.0 / 3.0 if "-" in xx else float(xx) + 1.0 / 3.0
                elif re.fullmatch(r"66+(6|7)?", dx):
                    x = float(xx) - 2.0 / 3.0 if "-" in xx else float(xx) + 2.0 / 3.0
                else:
                    x = float(x)
            else:
                x = float(x)
            if "." in y:
                yy, dy = y.split(".")
                if re.fullmatch(r"333+", dy):
                    y = float(yy) - 1.0 / 3.0 if "-" in yy else float(yy) + 1.0 / 3.0
                elif re.fullmatch(r"66+(6|7)?", dy):
                    y = float(yy) - 2.0 / 3.0 if "-" in yy else float(yy) + 2.0 / 3.0
                else:
                    y = float(y)
            else:
                y = float(y)
            if "." in z:
                zz, dz = z.split(".")
                if re.fullmatch(r"333+", dz):
                    z = float(zz) - 1.0 / 3.0 if "-" in zz else float(zz) + 1.0 / 3.0
                elif re.fullmatch(r"66+(6|7)?", dz):
                    z = float(zz) - 2.0 / 3.0 if "-" in zz else float(zz) + 2.0 / 3.0
                else:
                    z = float(z)
            else:
                z = float(z)

            logger.debug(f"xyz = {x:7.3f} {y:7.3f} {z:7.3f}")
            if self.periodicity == 3:
                if not have_fractionals:
                    # Convert Cartesians to fractionals
                    x, y, z = self.cell.to_fractionals([[x, y, z]])[0]
            # Non-periodic
            xs.append(x)
            ys.append(y)
            zs.append(z)
            symbols.append(symbol)
            names.append(name)

        if have_names:
            if "name" not in self.atoms:
                self.atoms.add_attribute("name")
            ids = self.atoms.append(x=xs, y=ys, z=zs, symbol=symbols, name=names)
        else:
            ids = self.atoms.append(x=xs, y=ys, z=zs, symbol=symbols)

        # Are there bonds?
        have_bonds = False
        if "_geom_bond" + dot + "atom_site_label_1" in data_block:
            label_1_section = "_geom_bond" + dot + "atom_site_label_1"
            label_2_section = "_geom_bond" + dot + "atom_site_label_2"
            symmetry_1_section = "_geom_bond" + dot + "site_symmetry_1"
            symmetry_2_section = "_geom_bond" + dot + "site_symmetry_2"
            have_bonds = True
        elif "_geom_bond" + dot + "atom_site_id_1" in data_block:
            label_1_section = "_geom_bond" + dot + "atom_site_id_1"
            label_2_section = "_geom_bond" + dot + "atom_site_id_2"
            symmetry_1_section = "_geom_bond" + dot + "site_symmetry_1"
            symmetry_2_section = "_geom_bond" + dot + "site_symmetry_2"
            have_bonds = True

        if have_bonds:
            if not have_names:
                raise RuntimeError("CIF file has bonds but no atom labels (names)")
            atom_id = {name: i for name, i in zip(names, ids)}
            for key in (symmetry_1_section, symmetry_2_section):
                if key not in data_block:
                    data_block[key] = ["."] * len(data_block[label_1_section])

            Is = []
            Js = []
            symop1s = []
            symop2s = []

            for label1, label2, sym1, sym2 in zip(
                data_block[label_1_section],
                data_block[label_2_section],
                data_block[symmetry_1_section],
                data_block[symmetry_2_section],
            ):
                i = atom_id[label1]
                j = atom_id[label2]

                if i < j:
                    Is.append(i)
                    Js.append(j)
                    symop1s.append(sym1)
                    symop2s.append(sym2)
                else:
                    Is.append(j)
                    Js.append(i)
                    symop1s.append(sym2)
                    symop2s.append(sym1)

            self.bonds.append(
                i=Is,
                j=Js,
                symop1=symop1s,
                symop2=symop2s,
            )

        # If used symops, need to find the spacegroup name(s)
        if used_symops:
            before = self.symmetry.symops
            international_name = self.symmetry.find_spacegroup_from_operators()
            self.symmetry.update_group(international_name)
            after = self.symmetry.symops
            if before != after:
                print("!!!!!!!!!!!!!!!!!!!! SYMOPS CHANGED !!!!!!!!!!!!!!!!!!!!!")

        # import pprint

        # print("Reading CIF file")
        # print(f"{self.symmetry.hall_number=}")
        # print(f"{self.symmetry.hall_symbol=}")
        # print(f"{self.symmetry.group=}")
        # pprint.pprint(self.atoms.get_coordinates(asymmetric=True))
        # print("all atoms")
        # pprint.pprint(self.atoms.get_coordinates(asymmetric=False))

        # print("\n\nSymmetry info")
        # pprint.pprint(self.get_symmetry_data(self.symmetry.hall_number))

        return result



[docs]
    def to_mmcif_text(self):
        """Create the text of a mmCIF file from this configuration.

        Returns
        -------
        text : str
            The text of the file.
        """

        lines = []

        atoms = self.atoms
        bonds = self.bonds

        # Get the chemical formula
        formula, empirical_formula, Z = self.formula

        _id = empirical_formula.replace(" ", "")

        # And created the file, line-by-line
        lines = []
        lines.append("# Generated by MolSSI SEAMM")
        lines.append(f"data_{_id}")
        lines.append(f"_chem_comp.name '{formula}'")
        lines.append(f"_chem_comp.id '{_id}'")
        lines.append(f"_chem_comp.formula   '{formula}'")

        # Cell information
        if self.periodicity == 3:
            cell = self.cell
            a, b, c, alpha, beta, gamma = cell.parameters
            volume = cell.volume
            # lines.append("_symmetry_space_group_name_H-M   'P 1'")
            lines.append(f"_cell.length_a   {a}")
            lines.append(f"_cell.length_b   {b}")
            lines.append(f"_cell.length_c   {c}")
            lines.append(f"_cell.angle_alpha   {alpha}")
            lines.append(f"_cell.angle_beta    {beta}")
            lines.append(f"_cell.angle_gamma   {gamma}")
            # lines.append("_symmetry_Int_Tables_number   1")
            lines.append(f"_cell.volume   {volume}")
            lines.append(f"_cell.formula_units_Z   {Z}")
            # lines.append("loop_")
            # lines.append(" _symmetry_equiv_pos_site_id")
            # lines.append(" _symmetry_equiv_pos_as_xyz")
            # lines.append("  1  'x, y, z'")

        # lines.append(f"_chemical_formula_structural   '{empirical_formula}'")
        lines.append(f"_chemical_formula.sum   '{formula}'")

        # The atoms
        lines.append("loop_")
        lines.append(" _chem_comp_atom.comp_id")
        lines.append(" _chem_comp_atom.atom_id")
        lines.append(" _chem_comp_atom.type_symbol")
        lines.append(" _chem_comp_atom.model_Cartn_x")
        lines.append(" _chem_comp_atom.model_Cartn_y")
        lines.append(" _chem_comp_atom.model_Cartn_z")
        lines.append(" _chem_comp_atom.pdbx_model_Cartn_x_ideal")
        lines.append(" _chem_comp_atom.pdbx_model_Cartn_y_ideal")
        lines.append(" _chem_comp_atom.pdbx_model_Cartn_z_ideal")
        lines.append(" _chem_comp_atom.pdbx_component_comp_id")
        lines.append(" _chem_comp_atom.pdbx_residue_numbering")

        # Need unique names
        if "names" in atoms:
            original_names = atoms.get_column("names")
        else:
            original_names = atoms.symbols

        names = []
        tmp = {}
        for name in original_names:
            if name in tmp:
                tmp[name] += 1
                names.append(name + str(tmp[name]))
            else:
                tmp[name] = 1
                names.append(name)

        XYZ = atoms.get_coordinates(in_cell="molecule", fractionals=False)
        XYZa = atoms.get_coordinates(fractionals=False)
        XYZa = XYZ

        symbols = atoms.symbols
        for element, name, xyza, xyz in zip(symbols, names, XYZa, XYZ):
            xa, ya, za = xyza
            x, y, z = xyz
            lines.append(
                f"MOL1 {name} {element} {xa:.3f} {ya:.3f} {za:.3f} "
                f"{x:.3f} {y:.3f} {z:.3f} HET 1"
            )

        # The bonds
        if bonds.n_bonds > 0:
            lines.append("#")
            lines.append("loop_")
            lines.append(" _chem_comp_bond.comp_id")
            lines.append(" _chem_comp_bond.atom_id_1")
            lines.append(" _chem_comp_bond.atom_id_2")
            lines.append(" _chem_comp_bond.value_order")
            index = {j: i for i, j in enumerate(atoms.ids)}
            for row in bonds.bonds():
                i = index[row["i"]]
                j = index[row["j"]]
                order = bond_order[row["bondorder"]]
                nm1 = names[i]
                nm2 = names[j]
                lines.append(f"MOL1 {nm1} {nm2} {order}")

        # And that is it!
        lines.append("")
        return "\n".join(lines)



[docs]
    def from_mmcif_text(self, text):
        """Create this configuration from an MMCIF file..

        Parameters
        ----------
        text : str
            The text from the MMCIF file

        Returns
        -------
        None

        Notes
        -----
        This can be called from a `SystemDB`, `_System` or `_Configuration`
        object. The behavior and errors differ depending on what type of object
        is calling it:

            `SystemDB`

            When called from a `_System` object, a new `_System` will be
            created for each datablock in the CIF data, and a configuration
            will be created to hold the structure, unless their is an NMR
            ensemble, in which case each structure in the ensemble will be
            placed in a different configuration.

            `_System`

            In this case it is an error if there is more than one datablock.
            A new configuration will be created with the structure in the CIF
            datablock, unless the CIF data contains an NMR ensemble, in which
            case a configuration will be added for each conformer.

            `_Configuration`

            It is an error if there is more than one datablock in the CIF data.
            The configuration will be cleared and the structure from CIF data
            inserted into it. If there is an NMR ensemble in the datablock, and
            the representative conformer is identified, it will be loaded into
            the configuration. Otherwise an error will be raised.
        """
        # What type of object is this?
        if "SystemDB" in str(type(self)):
            my_class = "SystemDB"
        elif "_System" in str(type(self)):
            my_class = "System"
        elif "_Configuration" in str(type(self)):
            my_class = "Configuration"

        cif = CifFile.ReadCif(io.StringIO(text))

        if my_class != "SystemDB" and len(cif) != 1:
            raise RuntimeError(f"There are {len(cif)} data blocks in the mmcif file.")

        # Loop over the datablocks, processing as we go.
        systems = []
        configurations = {}
        for name, data in cif.items():
            # Check the sanity of the input
            coordinate_system = "Cartesian"
            if "_atom_site.id" in data:
                if "_chem_comp_atom.atom_id" in data:
                    raise ValueError("Have both atom_site and chem_comp atoms")

                # Fractional or Cartesian coordinates?
                if "_atom_site.fract_x" in data:
                    coordinate_system = "fractional"
                elif "_atom_site.Cartn_x" not in data:
                    raise KeyError("Couldn't find coordinates in atom_site data")
            elif "_chem_comp_atom.atom_id" in data:
                if "_chem_comp_atom.model_Cartn_x" not in data:
                    raise KeyError("Couldn't find coordinates in chem_comp_atom data")

            # Get the name of the system/configuration
            for key in [
                "_entry.id",
                "_chem_comp.id",
                "_chem_comp.name",
                "_chem_comp.three_letter_code",
            ]:
                if key in data:
                    name = data[key]
                    break

            key = "_pdbx_nmr_ensemble.conformers_submitted_total_number"
            ensemble = key in data
            if ensemble:
                n_ensemble = int(data[key])
                key = "_pdbx_nmr_representative.conformer_id"
                configuration_name = "model_1"
                if key in data:
                    representative = int(data[key])
                    if representative == 1:
                        configuration_name == "representative"
                else:
                    representative = None
            else:
                configuration_name = name
            if my_class == "SystemDB":
                system = self.create_system(name)
                systems.append(system)
                configuration = system.create_configuration(configuration_name)
                configurations[system.id] = [configuration]
            elif my_class == "System":
                system = self
                configuration = system.create_configuration(configuration_name)
                configurations[system.id] = [configuration]
            else:
                configuration = self
                # Reset the configuration
                configuration.clear()
                configuration.periodicity = 0
                configuration.coordinate_system = "Cartesian"
                configuration.name = configuration_name

            # Add the atoms
            atoms = configuration.atoms
            kwargs = {}

            # Get the data from the CIF file, creating columns if needed.
            for cif_key, key, _type, default in [
                ("_chem_comp_atom.atom_id", "name", "str", ""),
                ("_chem_comp_atom.alt_atom_id", None, "str", ""),
                ("_chem_comp_atom.type_symbol", "symbol", "str", ""),
                ("_chem_comp_atom.model_Cartn_x", "x", "float", 0.0),
                ("_chem_comp_atom.model_Cartn_y", "y", "float", 0.0),
                ("_chem_comp_atom.model_Cartn_z", "z", "float", 0.0),
                ("_chem_comp_atom.charge", "formal_charge", "int", 0),
                ("_chem_comp_atom.pdbx_align", None, "int", 0),
                ("_chem_comp_atom.pdbx_aromatic_flag", None, "bool", False),
                ("_chem_comp_atom.pdbx_leaving_atom_flag", None, "bool", False),
                ("_chem_comp_atom.pdbx_stereo_config", None, "bool", False),
                ("_chem_comp_atom.pdbx_component_atom_id", None, "bool", False),
                ("_atom_site.group_PDB", None, "str", "HETATOM"),
                ("_atom_site.id", None, "str", None),
                ("_atom_site.type_symbol", "symbol", "str", None),
                ("_atom_site.label_atom_id", None, "str", None),
                ("_atom_site.label_alt_id", None, "str", ""),
                ("_atom_site.label_comp_id", None, "str", None),
                ("_atom_site.label_asym_id", None, "str", None),
                ("_atom_site.label_entity_id", None, "str", None),
                ("_atom_site.label_seq_id", None, "int", None),
                ("_atom_site.pdbx_PDB_ins_code", None, "str", None),
                ("_atom_site.pdbx_PDB_model_num", "ignore", "int", None),
                ("_atom_site.Cartn_x", "x", "float", 0.0),
                ("_atom_site.Cartn_y", "y", "float", 0.0),
                ("_atom_site.Cartn_z", "z", "float", 0.0),
                ("_atom_site.fract_x", "x", "float", 0.0),
                ("_atom_site.fract_y", "y", "float", 0.0),
                ("_atom_site.fract_z", "z", "float", 0.0),
                ("_atom_site.occupancy", "occupancy", "float", 0.0),
                ("_atom_site.B_iso_or_equiv", None, "float", 0.0),
                ("_atom_site.pdbx_formal_charge", "formal_charge", "int", 0),
            ]:
                if cif_key in data:
                    if key is None:
                        key = cif_key
                    if key == "ignore":
                        key = cif_key
                    elif key not in atoms and key != "symbol":
                        atoms.add_attribute(key, _type, default=default)

                    if _type == "bool":
                        kwargs[key] = [
                            None if x in ".?" else x == "N" for x in data[cif_key]
                        ]
                    elif _type == "int":
                        kwargs[key] = [
                            None if x in ".?" else int(x) for x in data[cif_key]
                        ]
                    elif _type == "float":
                        kwargs[key] = [
                            None if x in ".?" else float(x) for x in data[cif_key]
                        ]
                    else:
                        kwargs[key] = [None if x in ".?" else x for x in data[cif_key]]

            if ensemble:
                if "_atom_site.pdbx_PDB_model_num" not in kwargs:
                    raise KeyError("Is an ensemble but no model numbers.")
                model_num = kwargs.pop("_atom_site.pdbx_PDB_model_num")
                first = 0
                last = first
                model = model_num[0]
                for i in model_num:
                    if i == model:
                        last += 1
                    else:
                        atoms = configuration.atoms
                        model_args = {k: v[first:last] for k, v in kwargs.items()}
                        atom_ids = atoms.append(**model_args)

                        # Make the next configuration and reset pointers
                        model = i
                        first = last
                        last += 1
                        if model == representative:
                            configuration_name = "representative"
                        else:
                            configuration_name = f"model_{model}"
                        configuration = system.create_configuration(configuration_name)
                        configurations[system.id].append(configuration)
                # Put in the last model
                atoms = configuration.atoms
                model_args = {k: v[first:last] for k, v in kwargs.items()}
                atom_ids = atoms.append(**model_args)
                if model != n_ensemble:
                    logger.warning(
                        f"The actual number of models ({model}) does not "
                        f"match the claimed number ({n_ensemble})."
                    )
            else:
                if coordinate_system != "Cartesian":
                    raise NotImplementedError("Can't handle fractional coords")

                atom_ids = atoms.append(**kwargs)

                if "_chem_comp_bond.atom_id_1" in data:
                    # Prepare for the bonds, which are labeled by atom name
                    atom_id = {name: i for i, name in zip(atom_ids, kwargs["name"])}

                    # And the bonds
                    bonds = configuration.bonds
                    kwargs = {}
                    for cif_key, key, _type, default in [
                        ("_chem_comp_bond.atom_id_1", "i", "str", ""),
                        ("_chem_comp_bond.atom_id_2", "j", "str", ""),
                        ("_chem_comp_bond.value_order", "bondorder", "int", 1),
                        ("_chem_comp_bond.comp_id", None, "str", None),
                        ("_chem_comp_bond.aromatic_flag", None, "bool", False),
                        ("_chem_comp_bond.stereo_flag", None, "str", None),
                        ("_chem_comp_bond.value_dist", None, "float", None),
                        ("_chem_comp_bond.value_dist_esd", None, "float", None),
                    ]:
                        if cif_key in data:
                            if key is None:
                                key = cif_key
                            if key == "ignore":
                                key = cif_key
                            elif key not in bonds:
                                bonds.add_attribute(key, _type, default=default)

                            if key == "i" or key == "j":
                                kwargs[key] = [atom_id[x] for x in data[cif_key]]
                            elif key == "bondorder":
                                kwargs[key] = [to_bond_order[x] for x in data[cif_key]]
                            elif _type == "bool":
                                kwargs[key] = [
                                    None if x in ".?" else x == "N"
                                    for x in data[cif_key]
                                ]
                            elif _type == "int":
                                kwargs[key] = [
                                    None if x in ".?" else int(x) for x in data[cif_key]
                                ]
                            elif _type == "float":
                                kwargs[key] = [
                                    None if x in ".?" else float(x)
                                    for x in data[cif_key]
                                ]
                            else:
                                kwargs[key] = [
                                    None if x in ".?" else x for x in data[cif_key]
                                ]

                    bonds.append(**kwargs)
        return systems, configurations