Source code for syndirella.SMARTSHandler

#!/usr/bin/env python3
"""
SMARTSHandler.py

This module contains the SMARTSHandler class. This class contains information about the reaction SMARTS.
"""
import json
import logging
from collections import OrderedDict
from typing import (Set, List, Dict, Tuple, Any)

from rdkit import Chem, DataStructs
from rdkit.Chem import Mol
from rdkit.Chem.rdChemReactions import ReactionFromSmarts

import syndirella.fairy as fairy
from .cli_defaults import cli_default_settings
from .error import SMARTSError


[docs] class SMARTSHandler: def __init__(self, rxn_smirks_path: str = None): if rxn_smirks_path is not None: with open(rxn_smirks_path) as f: reaction_smirks = json.load(f) else: with open(cli_default_settings['rxn_smarts_path']) as f: reaction_smirks = json.load(f)
[docs] self.reaction_smarts = {name: ReactionFromSmarts(val) for name, val in reaction_smirks.items()}
[docs] self.reactant1_dict = OrderedDict()
[docs] self.reactant2_dict = OrderedDict()
[docs] self.product_smarts = OrderedDict()
[docs] self.n_reactants_per_reaction = OrderedDict()
for name, smart in reaction_smirks.items(): reactants, prod = smart.split(">>") try: react1, react2 = reactants.split(".") except ValueError: react1, react2 = reactants, None self.reactant1_dict[name] = react1 self.reactant2_dict[name] = react2 self.product_smarts[name] = prod self.n_reactants_per_reaction = {name: 1 if react2 is None else 2}
[docs] self.pattern_products = self.from_SMARTS_to_patterns(self.product_smarts)
[docs] self.pattern_reactant1 = self.from_SMARTS_to_patterns(self.reactant1_dict)
[docs] self.pattern_reactant2 = self.from_SMARTS_to_patterns(self.reactant2_dict)
[docs] self.matched_reactants = None
[docs] self.found_1 = None
[docs] self.found_2 = None
REACTANT1_PREFIX = self.fromReactionFullNameToReactantName("", 1) REACTANT2_PREFIX = self.fromReactionFullNameToReactantName("", 2) REACTIONS_NAMES = list(reaction_smirks.keys())
[docs] self.logger = logging.getLogger(f"{__name__}")
[docs] def fromReactionFullNameToReactantName(self, reactionName, reactantNum): return ("react%d_" + reactionName.replace(" ", "_")) % reactantNum
[docs] def fromReactantNameToReactionFullName(self, reactantName): return reactantName.replace("react1", "").replace("react2", "")
[docs] def from_SMARTS_to_patterns(self, smarts_dict): return tuple([(key, Chem.MolFromSmarts(val)) for key, val in smarts_dict.items() if val is not None])
[docs] def assign_reactants_w_rxn_smarts(self, product: Chem.Mol, reactant_attach_ids: Dict[Chem.Mol, List[Tuple[int, int]]], reaction_name: str) -> dict[ str, tuple[Mol, list[int], str]] | None | dict[ Any | None, None]: """ This function is used to assign the reactant number to input reactants using the reaction SMARTS. For now it only supports bimolecular reactions. """ if len(reactant_attach_ids) == 1: # Performing for one reactant patt = self.reactant1_dict[reaction_name] attach_ids = set(next(iter(reactant_attach_ids.values()))) # get first and only value reactant_mol = next(iter(reactant_attach_ids.keys())) # get first and only key self.matched_reactants: Dict[str, Tuple[Chem.Mol, List[int], str]] = ( self.format_matched_reactant_for_one(reactant_mol, attach_ids, patt)) if len(self.matched_reactants) == 0: message = "Reactant could not be matched to only reactant SMARTS in reaction." self.logger.critical(message) raise SMARTSError(mol=product, message=message) return self.matched_reactants r1: Chem.Mol = list(reactant_attach_ids.keys())[0] r2: Chem.Mol = list(reactant_attach_ids.keys())[1] # Check to make sure they are not the same similarity = DataStructs.FingerprintSimilarity(fairy.get_morgan_fingerprint(r1), fairy.get_morgan_fingerprint(r2)) if similarity == 1.0: message = "The two reactants are the same." self.logger.critical(message) raise SMARTSError(mol=product, message=message) r1_attach_ids: Set[Tuple[int, int]] = set(reactant_attach_ids[r1]) r2_attach_ids: Set[Tuple[int, int]] = set(reactant_attach_ids[r2]) patt1 = self.reactant1_dict[reaction_name] patt2 = self.reactant2_dict[reaction_name] self.matched_reactants = {patt1: None, patt2: None} found_1: Dict[str, Tuple[int] | bool] = self.find_matching_atoms(r1, patt1, patt2, r1_attach_ids) found_2: Dict[str, Tuple[int] | bool] = self.find_matching_atoms(r2, patt1, patt2, r2_attach_ids) # Check that both reactants have been found if not self.check_found_reactants(product, found_1, found_2, reaction_name, r1, r2): # will return False when both reactants match both reactant SMARTS. i.e. formation of urea. self.seperate_matching_reactants(r1, r2, found_1, found_2, patt1, patt2) self.found_1 = found_1 self.found_2 = found_2 return self.matched_reactants
[docs] def seperate_matching_reactants(self, r1: Chem.Mol, r2: Chem.Mol, found_1: Dict[str, bool], found_2: Dict[str, bool], patt1: str, patt2: str) -> Dict[str, Tuple[Chem.Mol, List[int], str]]: """ This function is used to fix edge cases: - to separate reactants that match both reactant SMARTS. - one reactant matches both reactant SMARTS. These found_1 and found_2 dictionaries are a bit confusing: Key: 'r1' or 'r2' Value: False if no match, List of atom indices within reactant that match the SMARTS of that reactant. """ # Both reactants match both reactant SMARTS if found_1["r1"] and found_2["r1"] and found_2["r2"] and found_1["r2"]: self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1') self.matched_reactants[patt2] = (r2, found_2["r2"], 'r2') # make found_2 r2 # reactant 1 matches both reactant SMARTS elif found_1["r1"] and found_1["r2"]: if found_2["r2"]: # change r2 to found_2 self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1') self.matched_reactants[patt2] = (r2, found_2["r2"], 'r2') if found_2["r1"]: # change r1 to found_2 self.matched_reactants[patt1] = (r2, found_2["r1"], 'r1') self.matched_reactants[patt2] = (r1, found_1["r2"], 'r2') # reactant 2 matches both reactant SMARTS elif found_2["r1"] and found_2["r2"]: if found_1["r2"]: # change r2 to found_1 self.matched_reactants[patt1] = (r2, found_2["r1"], 'r1') self.matched_reactants[patt2] = (r1, found_1["r2"], 'r2') if found_1["r1"]: # change r1 to found_1 self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1') self.matched_reactants[patt2] = (r2, found_2["r1"], 'r2')
[docs] def check_found_reactants(self, product: Chem.Mol, found_1: Dict[str, bool], found_2: Dict[str, bool], reaction_name: str, r1: Chem.Mol, r2: Chem.Mol) -> bool: """ This function checks that both reactants have been found. It raises a warning if both reactants are matched to the same reactant SMARTS. Returns False when both reactants match both reactant SMARTS. """ if not found_1["r1"] and not found_2["r1"]: self.logger.critical(f"The reactants do not match the reaction SMARTS in reaction {reaction_name} in " f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}.") raise SMARTSError(message=f"The reactants do not match the reaction SMARTS in reaction {reaction_name} in " f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}.", mol=product) if not found_1["r2"] and not found_2["r2"]: self.logger.critical( f"No atoms found involved in reaction {reaction_name} in " f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}") raise SMARTSError(message= f"No atoms found involved in reaction {reaction_name} in " f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}", mol=product) if found_1["r1"] and found_2["r1"]: self.logger.warning( f"Both reactants ({Chem.MolToSmiles(r1)} {Chem.MolToSmiles(r2)}) have atoms found in SMARTS of 1st " f"reactant for {reaction_name}. This might cause selectivity issues downstream, but continuing.") if found_1["r2"] and found_2["r2"]: self.logger.warning( f"Both reactants ({Chem.MolToSmiles(r1)} {Chem.MolToSmiles(r2)}) have atoms found in SMARTS of 2nd " f"reactant for {reaction_name}. This might cause selectivity issues downstream, but continuing.") return False return True
[docs] def find_matching_atoms(self, reactant: Chem.Mol, patt1: str, patt2: str, attachment_idx: set) -> Dict[ str, Tuple[int] | bool]: """ This function finds the matched atoms in a reactant against both reactant SMARTS. """ mol_patt1 = Chem.MolFromSmarts(patt1) mol_patt2 = Chem.MolFromSmarts(patt2) found = {'r1': False, 'r2': False} for mol_pattern, str_pattern, patt in zip([mol_patt1, mol_patt2], [patt1, patt2], ['r1', 'r2']): if found[patt]: continue matches = reactant.GetSubstructMatches(mol_pattern) for match in matches: if attachment_idx.intersection(match): self.matched_reactants[str_pattern] = (reactant, match, patt) found[patt] = match break return found
[docs] def format_matched_reactant_for_one(self, reactant_mol: Chem.Mol, attach_ids: List[Tuple[int, int]], patt: str) -> ( Dict)[str, Tuple[Chem.Mol, List[int], str]]: """ Formats matched reactants for one reactant. """ matched_reactants = {} mol_patt = Chem.MolFromSmarts(patt) matches = reactant_mol.GetSubstructMatches(mol_patt) for match in matches: if attach_ids.intersection(match): matched_reactants[patt] = (reactant_mol, match, 'r1') break # Lax check if no attachment ids are found if len(match) == mol_patt.GetNumAtoms(): matched_reactants[patt] = (reactant_mol, match, 'r1') break return matched_reactants