Source code for syndirella.route.Reaction

#!/usr/bin/env python3
"""
syndirella.route.Reaction.py

This module contains the Reaction class. One instance of this object is used to describe a single reaction.
"""

import logging
from typing import (List, Dict, Tuple)

from rdkit import Chem
from rdkit.Chem import rdFMCS

import syndirella.fairy as fairy
from syndirella.SMARTSHandler import SMARTSHandler
from syndirella.error import ReactionError


[docs] class Reaction(): """ This class contains information about the reaction. It is used to find the atoms on the reactants involved in the reaction, check the reaction atoms are the ones that are supposed to be connected. """ def __init__(self, product: Chem.Mol, reactants: List[Chem.Mol], reaction_name: str, smarts_handler: SMARTSHandler, route_uuid: str):
[docs] self.route_uuid: str = route_uuid
[docs] self.logger = logging.getLogger(f"{__name__}")
[docs] self.scaffold: Chem.Mol = product
[docs] self.reactants: List[Chem.Mol] = reactants
[docs] self.reaction_name: str = reaction_name
[docs] self.smarts_handler: SMARTSHandler = smarts_handler
[docs] self.reaction_pattern: Chem.rdChemReactions = self.smarts_handler.reaction_smarts[self.reaction_name]
[docs] self.all_attach_ids: Dict[Chem.Mol, List[int]] | None = self.find_attachment_ids_for_all_reactants()
self.matched_smarts_to_reactant, self.matched_smarts_index_to_reactant = self.find_reaction_atoms_for_all_reactants()
[docs] self.additional_rxn_options: List[Dict[str, str]] = fairy.load_additional_rxn_options()
[docs] self.alt_reactions: List[Dict[str, str]] | None = self.alt_reaction()
[docs] def alt_reaction(self) -> List[Dict[str, str]] | None: """ This function is used to determine if an additional reaction is specified. """ rxns = [] for rxn in self.additional_rxn_options: if rxn['name'] == self.reaction_name: rxns.append(rxn) return rxns
[docs] def get_additional_reactions(self) -> List[Tuple[str, Tuple[str, str]]] | None: """ This function edits the reactants to make an additional reaction. """ new_reactions: List[Tuple[str, Tuple[str, str]]] = [] for alt_rxn in self.alt_reactions: # go through all additional reactions self.logger.info(f"Additional reaction {alt_rxn['replace_with']} found for {self.reaction_name}.") for r_index in self.matched_smarts_index_to_reactant: # go through each reactant if alt_rxn['reactant_id_to_replace'] == r_index: new_reactant: Chem.Mol = self.edit_reactant(self.matched_smarts_index_to_reactant[r_index], alt_rxn['reactant_smarts_to_replace_with'], alt_rxn['reactant_smarts_to_replace'], alt_rxn['replacement_connecting_atom_id']) if new_reactant: other_reactant: Chem.Mol = \ next(value for key, value in self.matched_smarts_index_to_reactant.items() if key != r_index)[0] # check that new reactants can produce a scaffold reaction_name: str = alt_rxn['replace_with'] can_react: bool = self.check_reaction_can_produce_product(new_reactant, other_reactant, reaction_name) if can_react: self.logger.info(f"Additional reaction {reaction_name} for {self.reaction_name} found and " f"validated.") new_reactant: str = Chem.MolToSmiles(new_reactant) other_reactant: str = Chem.MolToSmiles(other_reactant) reactants: Tuple[str, str] = (new_reactant, other_reactant) new_reactions.append((reaction_name, reactants)) else: self.logger.error(f"Cannot edit reactant for additional reaction {alt_rxn['replace_with']}.") return new_reactions
[docs] def check_reaction_can_produce_product(self, new_reactant: Chem.Mol, other_reactant: Chem.Mol, reaction_name: str) -> bool: """ This function is used to check if the new reactants can simply produce a scaffold from the labeled reaction. """ reaction: Chem.rdChemReactions = self.smarts_handler.reaction_smarts[reaction_name] new_reactant_combo: Tuple[Chem.Mol] = (new_reactant, other_reactant) products: Tuple[Chem.Mol] = reaction.RunReactants(new_reactant_combo) if len(products[0]) == 0: self.logger.error(f"Additional reaction {reaction_name} cannot produce a scaffold from the new reactants " f"{Chem.MolToSmiles(new_reactant)} {Chem.MolToSmiles(other_reactant)}.") return False return True
[docs] def edit_reactant(self, reactant: Tuple[Chem.Mol, Tuple[int], str], new_reactant_smarts: str, to_replace_smarts: str, replacement_connecting_atom_id: int) -> Chem.Mol | None: """ Directly edits the reactant to replace SMARTS matched atoms to new SMARTS. """ to_replace: Chem.Mol = Chem.MolFromSmarts(to_replace_smarts) to_replace_with: Chem.Mol = Chem.MolFromSmarts(new_reactant_smarts) self.logger.info(f"to_replace_with_smarts: {new_reactant_smarts}") reactant_mol: Chem.Mol = reactant[0] replaced_reactants: Tuple[Chem.Mol] = Chem.ReplaceSubstructs(mol=reactant_mol, query=to_replace, replacement=to_replace_with, replacementConnectionPoint=replacement_connecting_atom_id, replaceAll=True) self.logger.info(f"Found {len(replaced_reactants)} new reactants.") for replaced_reactant in replaced_reactants: self.logger.info( f"Checking new reactant {Chem.MolToSmiles(replaced_reactant)} for reaction {self.reaction_name}...") # try to do full 360 mol to smiles to mol to check if it is valid try: replaced_reactant = Chem.MolFromSmiles(Chem.MolToSmiles(replaced_reactant)) except ValueError: self.logger.error(f"Cannot convert new reactant to smiles for reaction {self.reaction_name}") return None # check mol can be sanitized if not fairy.check_mol_sanity(replaced_reactant): self.logger.error(f"Cannot sanitize new reactant for reaction {self.reaction_name}") return None # get attach ids attach_ids: List[int] = [] for mol in self.all_attach_ids.keys(): if fairy.calculate_tanimoto(mol, reactant_mol) == 1.0: # get attach ids of the right reactant replaced attach_ids: List[int] = self.all_attach_ids[mol] if len(attach_ids) == 0: self.logger.error(f"No attachment points found for reaction {self.reaction_name}") return None # check if molecule returned was two molecules and only keep the one that has the closest number of atoms to the original reactant replaced_reactant_smiles: str = Chem.MolToSmiles(replaced_reactant) if '.' in replaced_reactant_smiles: self.logger.debug(f"Multiple fragmented molecules found in new reactant.") replaced_reactants: List[Chem.Mol] = [Chem.MolFromSmiles(mol) for mol in replaced_reactant_smiles.split('.')] # just check by which replaced_reactant contains the number of atoms closest to the len(matched_atoms) replaced_reactant = min(replaced_reactants, key=lambda x: abs(x.GetNumAtoms() - reactant_mol.GetNumAtoms())) replaced_reactant_smiles = Chem.MolToSmiles(replaced_reactant) self.logger.debug(f"Replaced reactant smiles: {replaced_reactant_smiles}") assert '.' not in replaced_reactant_smiles, f"Multiple molecules found in new reactant for reaction {self.reaction_name}" # get atom ids that match new smarts pattern matched_atoms: Tuple[int] = replaced_reactant.GetSubstructMatch(to_replace_with) if len(matched_atoms) == 0: self.logger.error( f"No atoms found in new reactant ({Chem.MolToSmiles(replaced_reactant)}) for reaction {self.reaction_name} that match with " f"the new SMARTS pattern, {new_reactant_smarts}.") return None self.logger.debug(f"Matched atoms: {matched_atoms}") # get all atoms that matched atoms are bonded to since SMARTS might not match attachment points for atom in matched_atoms: for bond in replaced_reactant.GetAtomWithIdx(atom).GetBonds(): if bond.GetBeginAtomIdx() in matched_atoms: attach_ids.append(bond.GetEndAtomIdx()) elif bond.GetEndAtomIdx() in matched_atoms: attach_ids.append(bond.GetBeginAtomIdx()) if any(atom in matched_atoms for atom in attach_ids): # If any attach ids are in the SMARTS of the new reactant self.logger.info(f"New reactant for reaction {self.reaction_name} found.") return replaced_reactant return None
[docs] def _replace_carboxylic_acid_hydroxy_with_dummy(self, mol: Chem.Mol) -> Chem.Mol: # Get the oxygen atoms in the molecule for atom in mol.GetAtoms(): if atom.GetAtomicNum() == 8: # Atomic number for Oxygen # Check if the oxygen is in a carboxylic acid group if len(atom.GetNeighbors()) == 1 and atom.GetNeighbors()[0].GetSymbol() == 'C': carboxylic_carbon = atom.GetNeighbors()[0] if len([nbr for nbr in carboxylic_carbon.GetNeighbors() if nbr.GetSymbol() == 'O']) == 2: atom.SetAtomicNum(0) # Set atomic number to 0, which is a dummy atom in RDKit return mol
[docs] def _replace_halide_with_dummy(self, mol: Chem.Mol, atom_to_check_by_symbol=None) -> Chem.Mol: # Convert the symbol to atomic number if atom_to_check_by_symbol is not None: atom_to_check_num = Chem.GetPeriodicTable().GetAtomicNumber(atom_to_check_by_symbol) # Start editing the molecule rw_mol = Chem.RWMol(mol) for atom in rw_mol.GetAtoms(): if atom.GetAtomicNum() in [9, 17, 35, 53]: # For F, Cl, Br, I for neighbor in atom.GetNeighbors(): if atom_to_check_by_symbol is not None: if neighbor.GetNum() == atom_to_check_num: atom.SetAtomicNum(0) else: atom.SetAtomicNum(0) # Set atomic number to 0 for dummy atom break # Convert RWMol back to Mol object return rw_mol.GetMol()
[docs] def _find_attachment_id_from_dummy(self, reactant: Chem.Mol, dummy_symbol="*") -> Tuple[List[int], List[List[int]]]: neig_idx_list = [] dummy_idx_list = [] for idx in range(reactant.GetNumAtoms()): if reactant.GetAtomWithIdx(idx).GetSymbol() == dummy_symbol: dummy_idx = idx neig_idx = [atom.GetIdx() for atom in reactant.GetAtomWithIdx(idx).GetNeighbors()] dummy_idx_list.append(dummy_idx) neig_idx_list.append(neig_idx) return dummy_idx_list, neig_idx_list
[docs] def use_fmcs(self, reactant: Chem.Mol) -> Dict[int, int]: mcs = rdFMCS.FindMCS([reactant, self.scaffold]) mcs_smarts = Chem.MolFromSmarts(mcs.smartsString) product_matches = self.scaffold.GetSubstructMatches(mcs_smarts) # Can return multiple product_matches, only take the first one product_match = product_matches[0] reactant_matches = reactant.GetSubstructMatches(mcs_smarts) reactant_match = reactant_matches[0] product_to_reactant_mapping = dict(zip(product_match, reactant_match)) return product_to_reactant_mapping
[docs] def find_attachment_id_for_reactant(self, reactant: Chem.Mol) -> List[int] | None: """ This function is used to find the attachment indices of a single reactant in the reaction. :param reactant: a single reactant molecule :returns a list of tuples (attachmentIdx_whole, attachmentIdx_subMol) """ # product_to_reactant_mapping = self.scaffold.GetSubstructMatch(reactant) # Trying new get substruct match method product_to_reactant_mapping: Dict[int, int] = self.use_fmcs(reactant) attachment_idxs_list = [] for bond in self.scaffold.GetBonds(): i = bond.GetBeginAtomIdx() j = bond.GetEndAtomIdx() i_inSub = i in product_to_reactant_mapping.keys() j_inSub = j in product_to_reactant_mapping.keys() if int(i_inSub) + int(j_inSub) == 1: if i_inSub: product_idx = i # This is the attachment point within scaffold try: reactant_idx = product_to_reactant_mapping[i] except (ValueError, KeyError): reactant_idx = None else: product_idx = j # This is the attachment point within scaffold try: reactant_idx = product_to_reactant_mapping[j] except (ValueError, KeyError): reactant_idx = None attachment_idxs_list.append((product_idx, reactant_idx)) if len(attachment_idxs_list) == 0 and self.reaction_name == 'Amide_Schotten-Baumann_with_amine': # run if no other attachments found # replace halide with dummy atom reactant = self._replace_halide_with_dummy(reactant) dummy_idx_list, neig_idx_list = self._find_attachment_id_from_dummy(reactant, dummy_symbol="*") print(neig_idx_list) print(dummy_idx_list) return neig_idx_list[0] if len(attachment_idxs_list) == 0 and self.reaction_name == 'Amidation': # run if no other attachments found # replace hydroxyl in carboxylic acid with dummy atom reactant = self._replace_carboxylic_acid_hydroxy_with_dummy(reactant) dummy_idx_list, neig_idx_list = self._find_attachment_id_from_dummy(reactant, dummy_symbol="*") print(neig_idx_list) print(dummy_idx_list) return neig_idx_list[0] if len(attachment_idxs_list) == 0 and self.reaction_name == 'Sulfonamide_Schotten-Baumann_with_amine_(intermolecular)': # it is probably having trouble on the sulfonyl halide group # replace halide with dummy atom reactant = self._replace_halide_with_dummy(reactant, atom_to_check_by_symbol='S') dummy_idx_list, neig_idx_list = self.find_attachment_id_from_dummy(reactant, dummy_symbol="*") print(neig_idx_list) print(dummy_idx_list) return neig_idx_list[0] if len(attachment_idxs_list) == 0 and self.reaction_name == "Buchwald-Hartwig_amination": # replace halide with dummy atom reactant = self._replace_halide_with_dummy(reactant) dummy_idx_list, neig_idx_list = self._find_attachment_id_from_dummy(reactant, dummy_symbol="*") print(neig_idx_list) print(dummy_idx_list) return neig_idx_list[0] if len(attachment_idxs_list) == 0 and self.reaction_name == "Epoxide_+_amine_coupling": # Find epoxide group, then find which carbon it is connected to. That is the attachment point. # This is a hacky solution, but it works for now. epoxide_pattern = Chem.MolFromSmarts("[O]1[C][C]1") matches = reactant.GetSubstructMatches(epoxide_pattern) if matches: for match in matches: atoms_to_remove = list(match) for atom in atoms_to_remove: if reactant.GetAtomWithIdx(atom).GetSymbol() == 'C': if reactant.GetAtomWithIdx( atom).GetTotalNumHs() == 1: # Find carbon with one hydrogen, not on end attach_idx = atom return [attach_idx] exact_attachment = [x[1] for x in attachment_idxs_list] return exact_attachment
[docs] def find_attachment_ids_for_all_reactants(self) -> Dict[Chem.Mol, List[int]] | None: """ This function is used to find the attachment indices of all reactants in the reaction. :returns a list of lists, each containing tuples of attachment indices for each reactant """ all_attachments = {} for reactant in self.reactants: attachments: List[int] | None = self.find_attachment_id_for_reactant(reactant) all_attachments[reactant] = attachments if any(len(attach_ids) == 0 for attach_ids in all_attachments.values()): raise ReactionError(message=f"No attachment points found for reaction {self.reaction_name}", mol=self.scaffold, route_uuid=self.route_uuid) all_attach_ids: Dict[Chem.Mol, List[int]] = all_attachments return all_attach_ids
[docs] def format_matched_smarts_to_index(self, matched_reactants: Dict[str, Tuple[Chem.Mol, List[int], str]]) -> Dict[int, Tuple[Chem.Mol, List[int], str]] | None: """ Formats matched smarts to reactant by using reactant index in smarts as the key. """ matched_smarts_index_to_reactant = {} for smarts, mol_id_rname in matched_reactants.items(): mol, ids, _ = mol_id_rname if '1' in mol_id_rname[-1]: matched_smarts_index_to_reactant[1] = (mol, ids, smarts) elif '2' in mol_id_rname[-1]: matched_smarts_index_to_reactant[2] = (mol, ids, smarts) else: self.logger.error(f"Cannot correctly store matched smarts to reactant index for {self.reaction_name}.") return None return matched_smarts_index_to_reactant
[docs] def find_reaction_atoms_for_all_reactants(self) -> Tuple[ Dict[str, Tuple[Chem.Mol, List[int], str]] | None, Dict[int, Tuple[Chem.Mol, List[int], str]]]: """ This function is used to find the reaction atoms of both reactants. And how those atoms correspond to the SMARTS pattern associated with the reaction. """ # check reactant smarts in both reactants matched_reactants: Dict[str, Tuple[Chem.Mol, List[int], str]] | None = ( self.smarts_handler.assign_reactants_w_rxn_smarts(product=self.scaffold, reactant_attach_ids=self.all_attach_ids, reaction_name=self.reaction_name)) matched_smarts_index_to_reactant: Dict[int, Tuple[Chem.Mol, List[int], str]] = ( self.format_matched_smarts_to_index(matched_reactants)) matched_smarts_to_reactant = matched_reactants if len(matched_smarts_to_reactant) == 0: raise ReactionError(message=f"No reaction atoms found for reaction {self.reaction_name}", mol=self.scaffold, route_uuid=self.route_uuid) return matched_smarts_to_reactant, matched_smarts_index_to_reactant