Source code for read_structure_step.formats.openbabel_io.checkers

import re

from read_structure_step.formats.registries import last_resort_checker


[docs] def check_for_pdb(file_name): """Check if a file appears to be a PDB file. The PDB files have a number of required keywords; however, some, like "AUTHOR" are common words, so this routine checks for the simultaneous presence of a number of the strangely spelled keywords. Parameters ---------- file_name : str The path to the file. Returns ------- bool True if the file appears to be a PDB file. """ keywords = ( "COMPND", "KEYWDS", "EXPDTA", "REVDAT", "REMARK 2", "REMARK 3", "SEQRES", "CRYST1", ) with open(file_name, "r") as f: data = f.read() if all(keyword in data for keyword in keywords): return True else: return False
[docs] def check_for_xyz(file_name): """Check if a file appears to be an XYZ file. Parameters ---------- file_name : str The path to the file. Returns ------- bool True if the file appears to be a PDB file. """ element_coords_regex = r"""^\s*(A[cglmrstu]|B[aehikr]?|C[adeflmnorsu] \ ?|D[bsy]|E[rsu]|F[elmr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airuv] \ |M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|S[bcegimnr\]?| \ T[abcehilm]|U(u[opst])?|V|W|Xe|Yb?|Z[nr \ ])\s*(\s*-?\d+(\.\d+([-+]e\d+)?)?\s*){3}$""" with open(file_name, "r") as f: for line_nbr, line in enumerate(f): if line_nbr > 2: break if line_nbr == 0 and re.search(r"^\s*[0-9]+\s*$", line) is None: return False if line_nbr == 2 and re.search(element_coords_regex, line) is not None: return True return False
[docs] def add_format_checkers(): """Add any missing format checkers.""" last_resort_checker(".pdb", check_for_pdb) last_resort_checker(".xyz", check_for_xyz)