#!/usr/bin/env python3
"""
SMARTSHandler.py
This module contains the SMARTSHandler class. This class contains information about the reaction SMARTS.
"""
import json
import logging
from collections import OrderedDict
from typing import (Set, List, Dict, Tuple, Any)
from rdkit import Chem, DataStructs
from rdkit.Chem import Mol
from rdkit.Chem.rdChemReactions import ReactionFromSmarts
import syndirella.fairy as fairy
from .cli_defaults import cli_default_settings
from .error import SMARTSError
[docs]
class SMARTSHandler:
def __init__(self,
rxn_smirks_path: str = None):
if rxn_smirks_path is not None:
with open(rxn_smirks_path) as f:
reaction_smirks = json.load(f)
else:
with open(cli_default_settings['rxn_smarts_path']) as f:
reaction_smirks = json.load(f)
[docs]
self.reaction_smarts = {name: ReactionFromSmarts(val) for name, val in reaction_smirks.items()}
[docs]
self.reactant1_dict = OrderedDict()
[docs]
self.reactant2_dict = OrderedDict()
[docs]
self.product_smarts = OrderedDict()
[docs]
self.n_reactants_per_reaction = OrderedDict()
for name, smart in reaction_smirks.items():
reactants, prod = smart.split(">>")
try:
react1, react2 = reactants.split(".")
except ValueError:
react1, react2 = reactants, None
self.reactant1_dict[name] = react1
self.reactant2_dict[name] = react2
self.product_smarts[name] = prod
self.n_reactants_per_reaction = {name: 1 if react2 is None else 2}
[docs]
self.pattern_products = self.from_SMARTS_to_patterns(self.product_smarts)
[docs]
self.pattern_reactant1 = self.from_SMARTS_to_patterns(self.reactant1_dict)
[docs]
self.pattern_reactant2 = self.from_SMARTS_to_patterns(self.reactant2_dict)
[docs]
self.matched_reactants = None
REACTANT1_PREFIX = self.fromReactionFullNameToReactantName("", 1)
REACTANT2_PREFIX = self.fromReactionFullNameToReactantName("", 2)
REACTIONS_NAMES = list(reaction_smirks.keys())
[docs]
self.logger = logging.getLogger(f"{__name__}")
[docs]
def fromReactionFullNameToReactantName(self, reactionName, reactantNum):
return ("react%d_" + reactionName.replace(" ", "_")) % reactantNum
[docs]
def fromReactantNameToReactionFullName(self, reactantName):
return reactantName.replace("react1", "").replace("react2", "")
[docs]
def from_SMARTS_to_patterns(self, smarts_dict):
return tuple([(key, Chem.MolFromSmarts(val)) for key, val in smarts_dict.items() if val is not None])
[docs]
def assign_reactants_w_rxn_smarts(self,
product: Chem.Mol,
reactant_attach_ids: Dict[Chem.Mol, List[Tuple[int, int]]],
reaction_name: str) -> dict[
str, tuple[Mol, list[int], str]] | None | dict[
Any | None, None]:
"""
This function is used to assign the reactant number to input reactants using the reaction SMARTS. For now it
only supports bimolecular reactions.
"""
if len(reactant_attach_ids) == 1: # Performing for one reactant
patt = self.reactant1_dict[reaction_name]
attach_ids = set(next(iter(reactant_attach_ids.values()))) # get first and only value
reactant_mol = next(iter(reactant_attach_ids.keys())) # get first and only key
self.matched_reactants: Dict[str, Tuple[Chem.Mol, List[int], str]] = (
self.format_matched_reactant_for_one(reactant_mol, attach_ids, patt))
if len(self.matched_reactants) == 0:
message = "Reactant could not be matched to only reactant SMARTS in reaction."
self.logger.critical(message)
raise SMARTSError(mol=product, message=message)
return self.matched_reactants
r1: Chem.Mol = list(reactant_attach_ids.keys())[0]
r2: Chem.Mol = list(reactant_attach_ids.keys())[1]
# Check to make sure they are not the same
similarity = DataStructs.FingerprintSimilarity(fairy.get_morgan_fingerprint(r1),
fairy.get_morgan_fingerprint(r2))
if similarity == 1.0:
message = "The two reactants are the same."
self.logger.critical(message)
raise SMARTSError(mol=product, message=message)
r1_attach_ids: Set[Tuple[int, int]] = set(reactant_attach_ids[r1])
r2_attach_ids: Set[Tuple[int, int]] = set(reactant_attach_ids[r2])
patt1 = self.reactant1_dict[reaction_name]
patt2 = self.reactant2_dict[reaction_name]
self.matched_reactants = {patt1: None, patt2: None}
found_1: Dict[str, Tuple[int] | bool] = self.find_matching_atoms(r1, patt1, patt2, r1_attach_ids)
found_2: Dict[str, Tuple[int] | bool] = self.find_matching_atoms(r2, patt1, patt2, r2_attach_ids)
# Check that both reactants have been found
if not self.check_found_reactants(product, found_1, found_2, reaction_name, r1, r2):
# will return False when both reactants match both reactant SMARTS. i.e. formation of urea.
self.seperate_matching_reactants(r1, r2, found_1, found_2, patt1, patt2)
self.found_1 = found_1
self.found_2 = found_2
return self.matched_reactants
[docs]
def seperate_matching_reactants(self,
r1: Chem.Mol,
r2: Chem.Mol,
found_1: Dict[str, bool],
found_2: Dict[str, bool],
patt1: str,
patt2: str) -> Dict[str, Tuple[Chem.Mol, List[int], str]]:
"""
This function is used to fix edge cases:
- to separate reactants that match both reactant SMARTS.
- one reactant matches both reactant SMARTS.
These found_1 and found_2 dictionaries are a bit confusing:
Key: 'r1' or 'r2'
Value: False if no match, List of atom indices within reactant that match the SMARTS of that reactant.
"""
# Both reactants match both reactant SMARTS
if found_1["r1"] and found_2["r1"] and found_2["r2"] and found_1["r2"]:
self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1')
self.matched_reactants[patt2] = (r2, found_2["r2"], 'r2') # make found_2 r2
# reactant 1 matches both reactant SMARTS
elif found_1["r1"] and found_1["r2"]:
if found_2["r2"]: # change r2 to found_2
self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1')
self.matched_reactants[patt2] = (r2, found_2["r2"], 'r2')
if found_2["r1"]: # change r1 to found_2
self.matched_reactants[patt1] = (r2, found_2["r1"], 'r1')
self.matched_reactants[patt2] = (r1, found_1["r2"], 'r2')
# reactant 2 matches both reactant SMARTS
elif found_2["r1"] and found_2["r2"]:
if found_1["r2"]: # change r2 to found_1
self.matched_reactants[patt1] = (r2, found_2["r1"], 'r1')
self.matched_reactants[patt2] = (r1, found_1["r2"], 'r2')
if found_1["r1"]: # change r1 to found_1
self.matched_reactants[patt1] = (r1, found_1["r1"], 'r1')
self.matched_reactants[patt2] = (r2, found_2["r1"], 'r2')
[docs]
def check_found_reactants(self,
product: Chem.Mol,
found_1: Dict[str, bool],
found_2: Dict[str, bool],
reaction_name: str,
r1: Chem.Mol,
r2: Chem.Mol) -> bool:
"""
This function checks that both reactants have been found. It raises a warning if both reactants are matched
to the same reactant SMARTS.
Returns False when both reactants match both reactant SMARTS.
"""
if not found_1["r1"] and not found_2["r1"]:
self.logger.critical(f"The reactants do not match the reaction SMARTS in reaction {reaction_name} in "
f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}.")
raise SMARTSError(message=f"The reactants do not match the reaction SMARTS in reaction {reaction_name} in "
f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}.", mol=product)
if not found_1["r2"] and not found_2["r2"]:
self.logger.critical(
f"No atoms found involved in reaction {reaction_name} in "
f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}")
raise SMARTSError(message=
f"No atoms found involved in reaction {reaction_name} in "
f"mol {Chem.MolToSmiles(r1)} and {Chem.MolToSmiles(r2)}",
mol=product)
if found_1["r1"] and found_2["r1"]:
self.logger.warning(
f"Both reactants ({Chem.MolToSmiles(r1)} {Chem.MolToSmiles(r2)}) have atoms found in SMARTS of 1st "
f"reactant for {reaction_name}. This might cause selectivity issues downstream, but continuing.")
if found_1["r2"] and found_2["r2"]:
self.logger.warning(
f"Both reactants ({Chem.MolToSmiles(r1)} {Chem.MolToSmiles(r2)}) have atoms found in SMARTS of 2nd "
f"reactant for {reaction_name}. This might cause selectivity issues downstream, but continuing.")
return False
return True
[docs]
def find_matching_atoms(self, reactant: Chem.Mol, patt1: str, patt2: str, attachment_idx: set) -> Dict[
str, Tuple[int] | bool]:
"""
This function finds the matched atoms in a reactant against both reactant SMARTS.
"""
mol_patt1 = Chem.MolFromSmarts(patt1)
mol_patt2 = Chem.MolFromSmarts(patt2)
found = {'r1': False, 'r2': False}
for mol_pattern, str_pattern, patt in zip([mol_patt1, mol_patt2], [patt1, patt2], ['r1', 'r2']):
if found[patt]:
continue
matches = reactant.GetSubstructMatches(mol_pattern)
for match in matches:
if attachment_idx.intersection(match):
self.matched_reactants[str_pattern] = (reactant, match, patt)
found[patt] = match
break
return found