Source code for neurosnap.chemicals
"""
Provides functions and classes related to processing chemical data.
"""
from typing import List, Optional
import numpy as np
from rdkit import Chem
from neurosnap.io.pdb import parse_pdb
from neurosnap.log import logger
### FUNCTIONS ###
[docs]
def smiles_to_sdf(smiles: str, output_path: str) -> None:
"""Converts a SMILES string to an sdf file.
Will overwrite existing results.
NOTE: This function does the bare minimum in terms of
generating the SDF molecule. The :obj:`neurosnap.conformers` module
should be used in most cases.
Parameters:
smiles: Smiles string to parse and convert
output_path: Path to output SDF file, should end with .sdf
"""
try:
m = Chem.MolFromSmiles(smiles)
# m = Chem.AddHs(m)
with Chem.SDWriter(output_path) as w:
w.write(m)
except Exception as e:
logger.error(f"Exception {e}")
raise ValueError(f'"{smiles}" is not a valid SMILES string, please follow the input instructions')
[docs]
def sdf_to_smiles(fpath: str) -> List[str]:
"""
Converts molecules in an SDF file to SMILES strings.
Reads an input SDF file and extracts SMILES strings from its molecules.
Invalid or unreadable molecules are skipped, with warnings logged.
Args:
fpath (str): Path to the input SDF file.
Returns:
List[str]: A list of SMILES strings corresponding to valid molecules in the SDF file.
Raises:
FileNotFoundError: If the SDF file cannot be found.
IOError: If the file cannot be read.
"""
output_smiles = []
supplier = Chem.SDMolSupplier(fpath, removeHs=False) # Keep hydrogens if present
for mol in supplier:
if mol is None:
logger.warning("Skipped an invalid molecule.")
continue
try:
smiles = Chem.MolToSmiles(mol)
output_smiles.append(smiles)
except Exception as e:
logger.error(f"Error converting molecule to SMILES: {e}")
return output_smiles
[docs]
def validate_smiles(smiles: str) -> bool:
"""
Validates a SMILES (Simplified Molecular Input Line Entry System) string.
Args:
smiles (str): The SMILES string to validate.
Returns:
bool: True if the SMILES string is valid, False otherwise.
Raises:
Exception: Logs any exception encountered during validation.
"""
if not smiles:
logger.error("Invalid input SMILES provided. SMILES string cannot be empty.")
return False
try:
mol: Optional[Chem.Mol] = Chem.MolFromSmiles(smiles)
return mol is not None
except Exception as e:
logger.error(f"Error validating SMILES: {e}")
return False
[docs]
def get_mol_center(mol, use_mass=False):
"""Computes the geometric center or center of mass of a molecule.
Args:
mol (Mol): An RDKit molecule object with 3D coordinates.
use_mass (bool, optional): If True, computes the center of mass using atomic masses.
If False, computes the simple geometric center. Defaults to False.
Returns:
np.ndarray: A NumPy array of shape (3,) representing the [x, y, z] center coordinates.
Returns None if the molecule has no conformers.
Raises:
ValueError: If no conformer is found in the molecule.
"""
conf = mol.GetConformer()
coords = []
masses = []
for atom in mol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
coords.append([pos.x, pos.y, pos.z])
if use_mass:
masses.append(atom.GetMass())
coords = np.array(coords)
if use_mass:
masses = np.array(masses)
return np.average(coords, axis=0, weights=masses)
else:
return np.mean(coords, axis=0)
[docs]
def move_ligand_to_center(ligand_sdf_path, receptor_pdb_path, output_sdf_path, use_mass=False):
"""Moves the center of a ligand in an SDF file to match the center of a receptor in a PDB file.
This function reads a ligand from an SDF file and a receptor from a PDB file, calculates
their respective centers (center of mass or geometric center), and translates the ligand
such that its center aligns with the receptor's center. The modified ligand is then saved
to a new SDF file.
Args:
ligand_sdf_path (str): Path to the input ligand SDF file.
receptor_pdb_path (str): Path to the input receptor PDB file.
output_sdf_path (str): Path where the adjusted ligand SDF will be saved.
use_mass (bool, optional): If True, compute center of mass; otherwise use geometric center. Defaults to False.
Returns:
str: Path to the output SDF file with the translated ligand.
Raises:
ValueError: If the ligand cannot be parsed from the input SDF file.
"""
suppl = Chem.SDMolSupplier(ligand_sdf_path, removeHs=False)
mol = suppl[0] # Assume single ligand
if mol is None:
raise ValueError("Could not parse ligand SDF.")
receptor = parse_pdb(receptor_pdb_path, return_type="ensemble").models()[0]
receptor_center = receptor.calculate_center_of_mass()
ligand_center = get_mol_center(mol, use_mass=use_mass)
shift = receptor_center - ligand_center
conf = mol.GetConformer()
for i in range(conf.GetNumAtoms()):
pos = conf.GetAtomPosition(i)
new_pos = np.array([pos.x, pos.y, pos.z]) + shift
conf.SetAtomPosition(i, new_pos)
writer = Chem.SDWriter(output_sdf_path)
writer.write(mol)
writer.close()
return output_sdf_path