# -*- coding: utf-8 -*-
"""Functions for handling CIF files
Bond Orders
-----------
1 sing single bond
2 doub double bond
3 trip triple bond
4 quad quadruple bond
5 arom aromatic bond
6 delo delocalized double bond
7 pi pi bond
8 poly polymeric bond
"""
import io
import json
import logging
import re
import CifFile
logger = logging.getLogger(__name__)
bond_order = {
1: "SING",
2: "DOUB",
3: "TRIP",
4: "QUAD",
5: "AROM",
6: "DELO",
7: "PI",
8: "POLY",
}
to_bond_order = {j: i for i, j in bond_order.items()}
[docs]
class CIFMixin:
"""A mixin for handling CIF files."""
[docs]
def read_cif_file(self, path):
"""Create new systems from a CIF file.
Read a CIF file and create a new system from each datablock in
the file.
If the datablock has an ensemble, as denoted by a section
'_pdbx_nmr_ensemble', a configuration will be created for each
conformer. If there is a representative conformer, the current
configuration will point to it; otherwise to the last conformer.
Parameters
----------
path : str or Path
A string or Path object pointing to the file to be read.
Returns
-------
[_System]
List of systems created.
"""
if "SystemDB" in str(type(self)):
sys_db = self
else:
sys_db = self.system_db
lines = []
systems = []
configurations = {}
in_block = False
block_name = ""
with open(path, "r") as fd:
for line in fd:
if line[0:5] == "data_":
logger.debug(f"Found block {line}")
if not in_block:
in_block = True
else:
new_systems, new_configurations = self.from_mmcif_text(
"\n".join(lines)
)
logger.debug(
f" added system {sys_db.n_systems}: {block_name}"
)
systems.extend(new_systems)
configurations.update(new_configurations)
block_name = line[5:].strip()
lines = []
lines.append(line)
if len(lines) > 0:
# The last block just ends at the end of the file
new_systems, new_configurations = self.from_mmcif_text("\n".join(lines))
logger.debug(f" added system {sys_db.n_systems}: {block_name}")
systems.extend(new_systems)
configurations.update(new_configurations)
return systems, configurations
[docs]
def to_cif_text(self):
"""Create the text of a CIF file from this configuration.
Returns
-------
text : str
The text of the file.
"""
lines = []
atoms = self.atoms
# Get the chemical formula
formula, empirical_formula, Z = self.formula
system_name = self.system.name
conf_name = self.name
if system_name == "" or conf_name == "":
_id = empirical_formula.replace(" ", "")
else:
_id = f"SEAMM:{system_name}/{conf_name}"
_id = _id.replace(" ", "")
# And create the file, line-by-line
lines = []
lines.append("# Generated by MolSSI SEAMM")
lines.append(f"data_{_id}")
# Cell information
if self.periodicity == 3:
cell = self.cell
symmetry = self.symmetry
a, b, c, alpha, beta, gamma = cell.parameters
volume = cell.volume
spgname = symmetry.group
if spgname == "":
pass
elif symmetry.n_symops == 1:
lines.append("_space_group_name_H-M_full 'P 1'")
else:
spgname_system = symmetry.spacegroup_names_to_system()[spgname]
lines.append(f"_space_group_{spgname_system} '{spgname}'")
hall = symmetry.hall_symbol
lines.append(f"_symmetry_space_group_name_Hall '{hall}'")
it_number = symmetry.IT_number
lines.append(f"_space_group_IT_number {it_number}")
lines.append(f"_cell_length_a {a}")
lines.append(f"_cell_length_b {b}")
lines.append(f"_cell_length_c {c}")
lines.append(f"_cell_angle_alpha {alpha}")
lines.append(f"_cell_angle_beta {beta}")
lines.append(f"_cell_angle_gamma {gamma}")
lines.append(f"_cell_volume {volume}")
lines.append(f"_cell_formula_units_Z {Z}")
lines.append("loop_")
lines.append(" _symmetry_equiv_pos_site_id")
lines.append(" _symmetry_equiv_pos_as_xyz")
if symmetry.n_symops == 1:
lines.append(" 1 x,y,z")
else:
for i, op in enumerate(symmetry.symops, start=1):
lines.append(f" {i:2} {op}")
lines.append(f"_chemical_formula_structural '{empirical_formula}'")
lines.append(f"_chemical_formula_sum '{formula}'")
# The atoms
lines.append("loop_")
lines.append(" _atom_site_type_symbol")
lines.append(" _atom_site_label")
lines.append(" _atom_site_symmetry_multiplicity")
lines.append(" _atom_site_fract_x")
lines.append(" _atom_site_fract_y")
lines.append(" _atom_site_fract_z")
lines.append(" _atom_site_occupancy")
# Need unique names
if "names" in atoms:
original_names = atoms.get_column("names")
else:
original_names = atoms.asymmetric_symbols
names = []
tmp = {}
for name in original_names:
if name in tmp:
tmp[name] += 1
names.append(name + str(tmp[name]))
else:
tmp[name] = 1
names.append(name + "1")
if symmetry.n_symops == 1:
UVW = atoms.get_coordinates(
fractionals=True, in_cell="molecule", asymmetric=True
)
else:
# For the moment can't center molecules in cell with symmetry.
UVW = atoms.get_coordinates(fractionals=True, asymmetric=True)
symbols = atoms.asymmetric_symbols
for element, name, uvw in zip(symbols, names, UVW):
u, v, w = uvw
lines.append(f" {element} {name} 1 {u:.3f} {v:.3f} {w:.3f} 1")
# Handle bonds.
if self.n_asymmetric_bonds > 0:
bonds = self.bonds
sym_bonds = bonds.bonds_for_asymmetric_bonds
indx = {j: i for i, j in enumerate(atoms.ids)}
Is = [indx[i] for i in bonds.get_column_data("i")]
Js = [indx[j] for j in bonds.get_column_data("j")]
op1s = bonds.get_column_data("symop1")
op2s = bonds.get_column_data("symop2")
Rs = bonds.get_lengths(asymmetric=True)
lines.append("loop_")
lines.append("_geom_bond_atom_site_label_1")
lines.append("_geom_bond_atom_site_label_2")
lines.append("_geom_bond_distance")
lines.append("_geom_bond_site_symmetry_1")
lines.append("_geom_bond_site_symmetry_2")
for i, j, r, op1, op2, n_sym_bonds in zip(
Is, Js, Rs, op1s, op2s, [len(x) for x in sym_bonds]
):
if n_sym_bonds > 0:
lines.append(f" {names[i]} {names[j]} {r:.4f} {op1} {op2}")
# And that is it!
lines.append("")
return "\n".join(lines)
[docs]
def from_cif_text(self, text):
"""Create this configuration from a CIF file..
Parameters
----------
text : str
The text from the CIF file
Returns
-------
None
"""
result = ""
cif = CifFile.ReadCif(io.StringIO(text))
data_blocks = [*cif.keys()]
if len(data_blocks) != 1:
raise RuntimeError(
f"There are {len(data_blocks)} data blocks in the cif file."
)
data_block = cif[data_blocks[0]]
logger.debug(json.dumps({**data_block}, indent=4, sort_keys=True))
# Reset the system
self.clear()
# The cell
dot = "."
if "_cell_length_a" in data_block:
dot = "_"
a = data_block["_cell" + dot + "length_a"].split("(")[0]
b = data_block["_cell" + dot + "length_b"].split("(")[0]
c = data_block["_cell" + dot + "length_c"].split("(")[0]
alpha = data_block["_cell" + dot + "angle_alpha"].split("(")[0]
beta = data_block["_cell" + dot + "angle_beta"].split("(")[0]
gamma = data_block["_cell" + dot + "angle_gamma"].split("(")[0]
if float(a) != 1 and float(b) != 1 and float(c) != 1:
self.periodicity = 3
self.coordinate_system = "fractional"
self.cell.parameters = (a, b, c, alpha, beta, gamma)
# Where is the symmetry info?
used_symops = False
if "_space_group_symop" + dot + "operation_xyz" in data_block:
symdata = "_space_group_symop" + dot + "operation_xyz"
self.symmetry.symops = data_block[symdata]
used_symops = True
elif "_symmetry_equiv" + dot + "pos_as_xyz" in data_block:
symdata = "_symmetry_equiv" + dot + "pos_as_xyz"
self.symmetry.symops = data_block[symdata]
used_symops = True
else:
found = False
for section in (
"_space_group" + dot + "name_Hall",
"_space_group" + dot + "name_H-M_full",
"_space_group" + dot + "name_H-M_alt",
"_symmetry" + dot + "space_group_name_Hall",
"_symmetry" + dot + "space_group_name_H-M",
):
if section in data_block:
found = True
self.symmetry.group = data_block[section]
if not found:
raise RuntimeError(
"CIF file does not contain required symmetry information. "
"Neither "
"'_symmetry_equiv" + dot + "pos_as_xyz' or "
"'_space_group_symop" + dot + "operation_xyz' or "
"_symmetry" + dot + "space_group_name_Hall or "
"_symmetry" + dot + "space_group_name_H-M "
"is present."
)
x_label = "_atom_site" + dot + "fract_x"
if x_label in data_block:
have_fractionals = True
y_label = "_atom_site" + dot + "fract_y"
z_label = "_atom_site" + dot + "fract_z"
else:
have_fractionals = False
x_label = "_atom_site" + dot + "Cartn_x"
y_label = "_atom_site" + dot + "Cartn_y"
z_label = "_atom_site" + dot + "Cartn_z"
else:
x_label = "_atom_site" + dot + "Cartn_x"
y_label = "_atom_site" + dot + "Cartn_y"
z_label = "_atom_site" + dot + "Cartn_z"
n_atoms = len(data_block[x_label])
# Check for alternate sites, warn and take highest occupation
use_alt_id = None
labels = []
alt_ids = None
if "_atom_site" + dot + "label_alt_id" in data_block:
key = "_atom_site" + dot + "label_alt_id"
if any(x != "." for x in data_block[key]):
comp_id = "_atom_site" + dot + "label_comp_id"
asym_id = "_atom_site" + dot + "label_asym_id"
entity_id = "_atom_site" + dot + "label_entity_id"
seq_id = "_atom_site" + dot + "label_seq_id"
if (
comp_id in data_block
and asym_id in data_block
and entity_id in data_block
and seq_id in data_block
):
alt_ids = data_block[key]
comp_ids = data_block[comp_id]
asym_ids = data_block[asym_id]
entity_ids = data_block[entity_id]
seq_ids = data_block[seq_id]
occupancy = "_atom_site" + dot + "occupancy"
if occupancy in data_block:
occs = data_block[occupancy]
else:
result += (
"Warning: No occupancy information in CIF file with alt "
"ids.\n\n"
)
occs = [1] * n_atoms
label = ""
alt_data = {}
for alt, comp, asym, entity, seq, occ in zip(
alt_ids, comp_ids, asym_ids, entity_ids, seq_ids, occs
):
label = (asym, entity, seq)
labels.append(label)
if alt != ".":
if label not in alt_data:
alt_data[label] = {}
data = alt_data[label]
if alt not in data:
data[alt] = {"occ": float(occ), "count": 1, "res": comp}
else:
data[alt]["occ"] += float(occ)
data[alt]["count"] += 1
if len(alt_data) > 0:
key = "_entry" + dot + "id"
if key in data_block:
result += f"Entry {data_block[key]} "
else:
result += "An entry "
result += (
"in the CIF file has multiple alternates. "
"Picking those with the largest occupancy:\n\n"
)
use_alt_id = {}
for label, data in alt_data.items():
max_occ = -1.0
for alt, tmp in data.items():
occ = tmp["occ"] / tmp["count"]
tmp["occ"] = occ
if occ > max_occ:
use_alt_id[label] = alt
max_occ = occ
for label, val in use_alt_id.items():
comp = alt_data[label][val]["res"]
chain, mol, entity = label
line = (
f" {mol} {chain} {comp}{entity} alt_id={val} selected from"
)
for alt, tmp in alt_data[label].items():
line += f" | {alt} {tmp['res']} {tmp['occ']:.2f} "
result += line
result += "\n"
if alt_ids is None:
alt_ids = ["."] * n_atoms
labels = [""] * n_atoms
xs = []
ys = []
zs = []
symbols = []
names = []
# May have type symbols or labels, or both. Use type symbols by preference
if "_atom_site" + dot + "type_symbol" in data_block:
type_section = "_atom_site" + dot + "type_symbol"
elif "_atom_site" + dot + "label" in data_block:
type_section = "_atom_site" + dot + "label"
else:
raise KeyError(
f"Neither _atom_site{dot}type_label or _atom_site{dot}label are in file"
)
# May need site labels later
if "_atom_site" + dot + "label" in data_block:
label_section = "_atom_site" + dot + "label"
have_names = True
else:
label_section = type_section
have_names = False
for x, y, z, symbol, label, alt, name in zip(
data_block[x_label],
data_block[y_label],
data_block[z_label],
data_block[type_section],
labels,
alt_ids,
data_block[label_section],
):
# Atom symbols may be followed by number, etc.
if alt != "." and label in use_alt_id and alt != use_alt_id[label]:
logger.debug(f" Skipping alternate configuration {label} -- {alt}")
continue
if len(symbol) > 1:
if symbol[1].isalpha():
symbol = symbol[0:2]
else:
symbol = symbol[0]
if symbol == "D":
symbol = "H"
# May have uncertainties in ()
x = x.split("(")[0]
y = y.split("(")[0]
z = z.split("(")[0]
# Check for 1/3 and 2/3
if "." in x:
xx, dx = x.split(".")
if re.fullmatch(r"333+", dx):
x = float(xx) - 1.0 / 3.0 if "-" in xx else float(xx) + 1.0 / 3.0
elif re.fullmatch(r"66+(6|7)?", dx):
x = float(xx) - 2.0 / 3.0 if "-" in xx else float(xx) + 2.0 / 3.0
else:
x = float(x)
else:
x = float(x)
if "." in y:
yy, dy = y.split(".")
if re.fullmatch(r"333+", dy):
y = float(yy) - 1.0 / 3.0 if "-" in yy else float(yy) + 1.0 / 3.0
elif re.fullmatch(r"66+(6|7)?", dy):
y = float(yy) - 2.0 / 3.0 if "-" in yy else float(yy) + 2.0 / 3.0
else:
y = float(y)
else:
y = float(y)
if "." in z:
zz, dz = z.split(".")
if re.fullmatch(r"333+", dz):
z = float(zz) - 1.0 / 3.0 if "-" in zz else float(zz) + 1.0 / 3.0
elif re.fullmatch(r"66+(6|7)?", dz):
z = float(zz) - 2.0 / 3.0 if "-" in zz else float(zz) + 2.0 / 3.0
else:
z = float(z)
else:
z = float(z)
logger.debug(f"xyz = {x:7.3f} {y:7.3f} {z:7.3f}")
if self.periodicity == 3:
if not have_fractionals:
# Convert Cartesians to fractionals
x, y, z = self.cell.to_fractionals([[x, y, z]])[0]
# Non-periodic
xs.append(x)
ys.append(y)
zs.append(z)
symbols.append(symbol)
names.append(name)
if have_names:
if "name" not in self.atoms:
self.atoms.add_attribute("name")
ids = self.atoms.append(x=xs, y=ys, z=zs, symbol=symbols, name=names)
else:
ids = self.atoms.append(x=xs, y=ys, z=zs, symbol=symbols)
# Are there bonds?
have_bonds = False
if "_geom_bond" + dot + "atom_site_label_1" in data_block:
label_1_section = "_geom_bond" + dot + "atom_site_label_1"
label_2_section = "_geom_bond" + dot + "atom_site_label_2"
symmetry_1_section = "_geom_bond" + dot + "site_symmetry_1"
symmetry_2_section = "_geom_bond" + dot + "site_symmetry_2"
have_bonds = True
elif "_geom_bond" + dot + "atom_site_id_1" in data_block:
label_1_section = "_geom_bond" + dot + "atom_site_id_1"
label_2_section = "_geom_bond" + dot + "atom_site_id_2"
symmetry_1_section = "_geom_bond" + dot + "site_symmetry_1"
symmetry_2_section = "_geom_bond" + dot + "site_symmetry_2"
have_bonds = True
if have_bonds:
if not have_names:
raise RuntimeError("CIF file has bonds but no atom labels (names)")
atom_id = {name: i for name, i in zip(names, ids)}
for key in (symmetry_1_section, symmetry_2_section):
if key not in data_block:
data_block[key] = ["."] * len(data_block[label_1_section])
Is = []
Js = []
symop1s = []
symop2s = []
for label1, label2, sym1, sym2 in zip(
data_block[label_1_section],
data_block[label_2_section],
data_block[symmetry_1_section],
data_block[symmetry_2_section],
):
i = atom_id[label1]
j = atom_id[label2]
if i < j:
Is.append(i)
Js.append(j)
symop1s.append(sym1)
symop2s.append(sym2)
else:
Is.append(j)
Js.append(i)
symop1s.append(sym2)
symop2s.append(sym1)
self.bonds.append(
i=Is,
j=Js,
symop1=symop1s,
symop2=symop2s,
)
# If used symops, need to find the spacegroup name(s)
if used_symops:
before = self.symmetry.symops
international_name = self.symmetry.find_spacegroup_from_operators()
self.symmetry.update_group(international_name)
after = self.symmetry.symops
if before != after:
print("!!!!!!!!!!!!!!!!!!!! SYMOPS CHANGED !!!!!!!!!!!!!!!!!!!!!")
# import pprint
# print("Reading CIF file")
# print(f"{self.symmetry.hall_number=}")
# print(f"{self.symmetry.hall_symbol=}")
# print(f"{self.symmetry.group=}")
# pprint.pprint(self.atoms.get_coordinates(asymmetric=True))
# print("all atoms")
# pprint.pprint(self.atoms.get_coordinates(asymmetric=False))
# print("\n\nSymmetry info")
# pprint.pprint(self.get_symmetry_data(self.symmetry.hall_number))
return result
[docs]
def to_mmcif_text(self):
"""Create the text of a mmCIF file from this configuration.
Returns
-------
text : str
The text of the file.
"""
lines = []
atoms = self.atoms
bonds = self.bonds
# Get the chemical formula
formula, empirical_formula, Z = self.formula
_id = empirical_formula.replace(" ", "")
# And created the file, line-by-line
lines = []
lines.append("# Generated by MolSSI SEAMM")
lines.append(f"data_{_id}")
lines.append(f"_chem_comp.name '{formula}'")
lines.append(f"_chem_comp.id '{_id}'")
lines.append(f"_chem_comp.formula '{formula}'")
# Cell information
if self.periodicity == 3:
cell = self.cell
a, b, c, alpha, beta, gamma = cell.parameters
volume = cell.volume
# lines.append("_symmetry_space_group_name_H-M 'P 1'")
lines.append(f"_cell.length_a {a}")
lines.append(f"_cell.length_b {b}")
lines.append(f"_cell.length_c {c}")
lines.append(f"_cell.angle_alpha {alpha}")
lines.append(f"_cell.angle_beta {beta}")
lines.append(f"_cell.angle_gamma {gamma}")
# lines.append("_symmetry_Int_Tables_number 1")
lines.append(f"_cell.volume {volume}")
lines.append(f"_cell.formula_units_Z {Z}")
# lines.append("loop_")
# lines.append(" _symmetry_equiv_pos_site_id")
# lines.append(" _symmetry_equiv_pos_as_xyz")
# lines.append(" 1 'x, y, z'")
# lines.append(f"_chemical_formula_structural '{empirical_formula}'")
lines.append(f"_chemical_formula.sum '{formula}'")
# The atoms
lines.append("loop_")
lines.append(" _chem_comp_atom.comp_id")
lines.append(" _chem_comp_atom.atom_id")
lines.append(" _chem_comp_atom.type_symbol")
lines.append(" _chem_comp_atom.model_Cartn_x")
lines.append(" _chem_comp_atom.model_Cartn_y")
lines.append(" _chem_comp_atom.model_Cartn_z")
lines.append(" _chem_comp_atom.pdbx_model_Cartn_x_ideal")
lines.append(" _chem_comp_atom.pdbx_model_Cartn_y_ideal")
lines.append(" _chem_comp_atom.pdbx_model_Cartn_z_ideal")
lines.append(" _chem_comp_atom.pdbx_component_comp_id")
lines.append(" _chem_comp_atom.pdbx_residue_numbering")
# Need unique names
if "names" in atoms:
original_names = atoms.get_column("names")
else:
original_names = atoms.symbols
names = []
tmp = {}
for name in original_names:
if name in tmp:
tmp[name] += 1
names.append(name + str(tmp[name]))
else:
tmp[name] = 1
names.append(name)
XYZ = atoms.get_coordinates(in_cell="molecule", fractionals=False)
XYZa = atoms.get_coordinates(fractionals=False)
XYZa = XYZ
symbols = atoms.symbols
for element, name, xyza, xyz in zip(symbols, names, XYZa, XYZ):
xa, ya, za = xyza
x, y, z = xyz
lines.append(
f"MOL1 {name} {element} {xa:.3f} {ya:.3f} {za:.3f} "
f"{x:.3f} {y:.3f} {z:.3f} HET 1"
)
# The bonds
if bonds.n_bonds > 0:
lines.append("#")
lines.append("loop_")
lines.append(" _chem_comp_bond.comp_id")
lines.append(" _chem_comp_bond.atom_id_1")
lines.append(" _chem_comp_bond.atom_id_2")
lines.append(" _chem_comp_bond.value_order")
index = {j: i for i, j in enumerate(atoms.ids)}
for row in bonds.bonds():
i = index[row["i"]]
j = index[row["j"]]
order = bond_order[row["bondorder"]]
nm1 = names[i]
nm2 = names[j]
lines.append(f"MOL1 {nm1} {nm2} {order}")
# And that is it!
lines.append("")
return "\n".join(lines)
[docs]
def from_mmcif_text(self, text):
"""Create this configuration from an MMCIF file..
Parameters
----------
text : str
The text from the MMCIF file
Returns
-------
None
Notes
-----
This can be called from a `SystemDB`, `_System` or `_Configuration`
object. The behavior and errors differ depending on what type of object
is calling it:
`SystemDB`
When called from a `_System` object, a new `_System` will be
created for each datablock in the CIF data, and a configuration
will be created to hold the structure, unless their is an NMR
ensemble, in which case each structure in the ensemble will be
placed in a different configuration.
`_System`
In this case it is an error if there is more than one datablock.
A new configuration will be created with the structure in the CIF
datablock, unless the CIF data contains an NMR ensemble, in which
case a configuration will be added for each conformer.
`_Configuration`
It is an error if there is more than one datablock in the CIF data.
The configuration will be cleared and the structure from CIF data
inserted into it. If there is an NMR ensemble in the datablock, and
the representative conformer is identified, it will be loaded into
the configuration. Otherwise an error will be raised.
"""
# What type of object is this?
if "SystemDB" in str(type(self)):
my_class = "SystemDB"
elif "_System" in str(type(self)):
my_class = "System"
elif "_Configuration" in str(type(self)):
my_class = "Configuration"
cif = CifFile.ReadCif(io.StringIO(text))
if my_class != "SystemDB" and len(cif) != 1:
raise RuntimeError(f"There are {len(cif)} data blocks in the mmcif file.")
# Loop over the datablocks, processing as we go.
systems = []
configurations = {}
for name, data in cif.items():
# Check the sanity of the input
coordinate_system = "Cartesian"
if "_atom_site.id" in data:
if "_chem_comp_atom.atom_id" in data:
raise ValueError("Have both atom_site and chem_comp atoms")
# Fractional or Cartesian coordinates?
if "_atom_site.fract_x" in data:
coordinate_system = "fractional"
elif "_atom_site.Cartn_x" not in data:
raise KeyError("Couldn't find coordinates in atom_site data")
elif "_chem_comp_atom.atom_id" in data:
if "_chem_comp_atom.model_Cartn_x" not in data:
raise KeyError("Couldn't find coordinates in chem_comp_atom data")
# Get the name of the system/configuration
for key in [
"_entry.id",
"_chem_comp.id",
"_chem_comp.name",
"_chem_comp.three_letter_code",
]:
if key in data:
name = data[key]
break
key = "_pdbx_nmr_ensemble.conformers_submitted_total_number"
ensemble = key in data
if ensemble:
n_ensemble = int(data[key])
key = "_pdbx_nmr_representative.conformer_id"
configuration_name = "model_1"
if key in data:
representative = int(data[key])
if representative == 1:
configuration_name == "representative"
else:
representative = None
else:
configuration_name = name
if my_class == "SystemDB":
system = self.create_system(name)
systems.append(system)
configuration = system.create_configuration(configuration_name)
configurations[system.id] = [configuration]
elif my_class == "System":
system = self
configuration = system.create_configuration(configuration_name)
configurations[system.id] = [configuration]
else:
configuration = self
# Reset the configuration
configuration.clear()
configuration.periodicity = 0
configuration.coordinate_system = "Cartesian"
configuration.name = configuration_name
# Add the atoms
atoms = configuration.atoms
kwargs = {}
# Get the data from the CIF file, creating columns if needed.
for cif_key, key, _type, default in [
("_chem_comp_atom.atom_id", "name", "str", ""),
("_chem_comp_atom.alt_atom_id", None, "str", ""),
("_chem_comp_atom.type_symbol", "symbol", "str", ""),
("_chem_comp_atom.model_Cartn_x", "x", "float", 0.0),
("_chem_comp_atom.model_Cartn_y", "y", "float", 0.0),
("_chem_comp_atom.model_Cartn_z", "z", "float", 0.0),
("_chem_comp_atom.charge", "formal_charge", "int", 0),
("_chem_comp_atom.pdbx_align", None, "int", 0),
("_chem_comp_atom.pdbx_aromatic_flag", None, "bool", False),
("_chem_comp_atom.pdbx_leaving_atom_flag", None, "bool", False),
("_chem_comp_atom.pdbx_stereo_config", None, "bool", False),
("_chem_comp_atom.pdbx_component_atom_id", None, "bool", False),
("_atom_site.group_PDB", None, "str", "HETATOM"),
("_atom_site.id", None, "str", None),
("_atom_site.type_symbol", "symbol", "str", None),
("_atom_site.label_atom_id", None, "str", None),
("_atom_site.label_alt_id", None, "str", ""),
("_atom_site.label_comp_id", None, "str", None),
("_atom_site.label_asym_id", None, "str", None),
("_atom_site.label_entity_id", None, "str", None),
("_atom_site.label_seq_id", None, "int", None),
("_atom_site.pdbx_PDB_ins_code", None, "str", None),
("_atom_site.pdbx_PDB_model_num", "ignore", "int", None),
("_atom_site.Cartn_x", "x", "float", 0.0),
("_atom_site.Cartn_y", "y", "float", 0.0),
("_atom_site.Cartn_z", "z", "float", 0.0),
("_atom_site.fract_x", "x", "float", 0.0),
("_atom_site.fract_y", "y", "float", 0.0),
("_atom_site.fract_z", "z", "float", 0.0),
("_atom_site.occupancy", "occupancy", "float", 0.0),
("_atom_site.B_iso_or_equiv", None, "float", 0.0),
("_atom_site.pdbx_formal_charge", "formal_charge", "int", 0),
]:
if cif_key in data:
if key is None:
key = cif_key
if key == "ignore":
key = cif_key
elif key not in atoms and key != "symbol":
atoms.add_attribute(key, _type, default=default)
if _type == "bool":
kwargs[key] = [
None if x in ".?" else x == "N" for x in data[cif_key]
]
elif _type == "int":
kwargs[key] = [
None if x in ".?" else int(x) for x in data[cif_key]
]
elif _type == "float":
kwargs[key] = [
None if x in ".?" else float(x) for x in data[cif_key]
]
else:
kwargs[key] = [None if x in ".?" else x for x in data[cif_key]]
if ensemble:
if "_atom_site.pdbx_PDB_model_num" not in kwargs:
raise KeyError("Is an ensemble but no model numbers.")
model_num = kwargs.pop("_atom_site.pdbx_PDB_model_num")
first = 0
last = first
model = model_num[0]
for i in model_num:
if i == model:
last += 1
else:
atoms = configuration.atoms
model_args = {k: v[first:last] for k, v in kwargs.items()}
atom_ids = atoms.append(**model_args)
# Make the next configuration and reset pointers
model = i
first = last
last += 1
if model == representative:
configuration_name = "representative"
else:
configuration_name = f"model_{model}"
configuration = system.create_configuration(configuration_name)
configurations[system.id].append(configuration)
# Put in the last model
atoms = configuration.atoms
model_args = {k: v[first:last] for k, v in kwargs.items()}
atom_ids = atoms.append(**model_args)
if model != n_ensemble:
logger.warning(
f"The actual number of models ({model}) does not "
f"match the claimed number ({n_ensemble})."
)
else:
if coordinate_system != "Cartesian":
raise NotImplementedError("Can't handle fractional coords")
atom_ids = atoms.append(**kwargs)
if "_chem_comp_bond.atom_id_1" in data:
# Prepare for the bonds, which are labeled by atom name
atom_id = {name: i for i, name in zip(atom_ids, kwargs["name"])}
# And the bonds
bonds = configuration.bonds
kwargs = {}
for cif_key, key, _type, default in [
("_chem_comp_bond.atom_id_1", "i", "str", ""),
("_chem_comp_bond.atom_id_2", "j", "str", ""),
("_chem_comp_bond.value_order", "bondorder", "int", 1),
("_chem_comp_bond.comp_id", None, "str", None),
("_chem_comp_bond.aromatic_flag", None, "bool", False),
("_chem_comp_bond.stereo_flag", None, "str", None),
("_chem_comp_bond.value_dist", None, "float", None),
("_chem_comp_bond.value_dist_esd", None, "float", None),
]:
if cif_key in data:
if key is None:
key = cif_key
if key == "ignore":
key = cif_key
elif key not in bonds:
bonds.add_attribute(key, _type, default=default)
if key == "i" or key == "j":
kwargs[key] = [atom_id[x] for x in data[cif_key]]
elif key == "bondorder":
kwargs[key] = [to_bond_order[x] for x in data[cif_key]]
elif _type == "bool":
kwargs[key] = [
None if x in ".?" else x == "N"
for x in data[cif_key]
]
elif _type == "int":
kwargs[key] = [
None if x in ".?" else int(x) for x in data[cif_key]
]
elif _type == "float":
kwargs[key] = [
None if x in ".?" else float(x)
for x in data[cif_key]
]
else:
kwargs[key] = [
None if x in ".?" else x for x in data[cif_key]
]
bonds.append(**kwargs)
return systems, configurations