Source code for molsystem.pubchem
# -*- coding: utf-8 -*-
"""Functions for handling PubChem"""
import logging
from urllib.parse import quote as url_quote
import requests
logger = logging.getLogger(__name__)
pug_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
try:
import pubchempy as pcp
except ModuleNotFoundError:
print(
"To use PubChem, please install pubchempy using conda:\n"
" conda install -c conda-forge pubchempy"
)
logger.warning(
"To use PubChem, please install pubchempy using conda:\n"
" conda install -c conda-forge pubchempy"
)
[docs]
class PubChemMixin:
"""A mixin for handling the PubChem database."""
@property
def PC_cid(self):
"""Return the PubChem CID for this structure, or None."""
inchi = self.to_inchi()
results = pcp.get_compounds(inchi, "inchi")
if len(results) == 0:
return None
if len(results) >= 1:
logger.info(f"PubChem search returned more than one hit of {inchi}")
result = results[0].to_dict()
if "cid" in result:
return result["cid"]
return None
[docs]
def PC_from_cid(self, cid, fallback=None):
"""Create the configuration from the PubChem 3-D structure, if available.
Parameters
----------
cid : int
The PubChem CID.
fallback : str
A fallback SMILES, InChI, etc. to use if PubChem fails
"""
response = requests.get(f"{pug_url}/compound/cid/{cid}/SDF")
if response.status_code == 200:
self.from_sdf_text(response.text)
return
# An error!
if fallback is None:
raise RuntimeError(f"No 3-D structure available for {cid}")
# See what the fallback is
if len(fallback) == 27:
tmp = fallback.split("-")
if (
len(tmp) == 3
and len(tmp[0]) == 14
and len(tmp[1]) == 10
and len(tmp[2]) == 1
):
self.from_inchikey(fallback)
return
if fallback[0:7] == "InChI=":
self.from_inchi(fallback)
return
else:
self.from_smiles(fallback)
return
[docs]
def PC_from_identifier(self, identifier, namespace="detect", fallback=None):
"""Create the configuration from the PubChem 3-D structure, if available.
Parameters
----------
identifier : int or str
The PubChem identifier
namespace : str
The PubChem namespace: cid, name, smiles, inchi, inchikey
fallback : str
A fallback SMILES, InChI, etc. to use if PubChem fails
"""
if namespace == "detect":
# Work through the possibilities
if len(identifier) == 27:
tmp = identifier.split("-")
if (
len(tmp) == 3
and len(tmp[0]) == 14
and len(tmp[1]) == 10
and len(tmp[2]) == 1
):
namespace = ["inchikey"]
elif identifier[0:7] == "InChI=":
self.from_inchi(identifier)
namespaces = ["inchi"]
else:
namespaces = ["name", "smiles"]
else:
namespaces = [namespace]
for namespace in namespaces:
response = requests.get(
f"{pug_url}/compound/{namespace}/{url_quote(identifier)}/SDF"
)
if response.status_code == 200:
self.from_sdf_text(response.text)
return
# An error!
if fallback is None:
raise RuntimeError(f"No 3-D structure available for {identifier}")
# See what the fallback is
if len(fallback) == 27:
tmp = fallback.split("-")
if (
len(tmp) == 3
and len(tmp[0]) == 14
and len(tmp[1]) == 10
and len(tmp[2]) == 1
):
self.from_inchikey(fallback)
return
if fallback[0:7] == "InChI=":
self.from_inchi(fallback)
return
else:
self.from_smiles(fallback)
return
[docs]
def PC_iupac_name(self, fallback=None):
"""Return the IUPAC name for this structure, or None.
Parameters
----------
fallback : str
A name to return if PubChem doesn't have a name
"""
inchi = self.to_inchi()
results = pcp.get_compounds(inchi, "inchi")
if len(results) == 0:
return fallback
if len(results) >= 1:
logger.info(f"PubChem search returned more than one hit of {inchi}")
result = results[0].to_dict()
if "iupac_name" in result:
return result["iupac_name"]
return fallback