From 45d05453933e573bf550c97d53d8da8bdcb69026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=9C=9F=E7=94=AB?= <1320489816@qq.com> Date: Thu, 25 Jan 2024 15:05:29 +0800 Subject: [PATCH] fix a bug about case like (CH2)5 --- molscribe/chemistry.py | 81 ++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/molscribe/chemistry.py b/molscribe/chemistry.py index f6f7f96..f8b466c 100644 --- a/molscribe/chemistry.py +++ b/molscribe/chemistry.py @@ -5,7 +5,7 @@ import rdkit import rdkit.Chem as Chem - +from rdkit.Chem import AllChem rdkit.RDLogger.DisableLog('rdApp.*') from SmilesPE.pretokenizer import atomwise_tokenizer @@ -360,7 +360,10 @@ def get_smiles_from_symbol(symbol, mol, atom, bonds): total_bonds = int(sum([bond.GetBondTypeAsDouble() for bond in bonds])) formula_list = _expand_carbon(_parse_formula(symbol)) - smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, total_bonds, None) + if len(bonds)!=2: + smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, total_bonds, None) + else: + smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, 1, None) if success: return smiles return None @@ -442,39 +445,47 @@ def _need_expand(mol, mappings): atom.SetIsotope(0) continue - # remove bonds connected to abbreviation/condensed formula - adjacent_indices = [bond.GetOtherAtomIdx(i) for bond in bonds] - for adjacent_idx in adjacent_indices: - mol_w.RemoveBond(i, adjacent_idx) - - adjacent_atoms = [mol_w.GetAtomWithIdx(adjacent_idx) for adjacent_idx in adjacent_indices] - for adjacent_atom, bond in zip(adjacent_atoms, bonds): - adjacent_atom.SetNumRadicalElectrons(int(bond.GetBondTypeAsDouble())) - - # get indices of atoms of main body that connect to substituent - bonding_atoms_w = adjacent_indices - # assume indices are concated after combine mol_w and mol_r - bonding_atoms_r = [mol_w.GetNumAtoms()] - for atm in mol_r.GetAtoms(): - if atm.GetNumRadicalElectrons() and atm.GetIdx() > 0: - bonding_atoms_r.append(mol_w.GetNumAtoms() + atm.GetIdx()) - - # combine main body and substituent into a single molecule object - combo = Chem.CombineMols(mol_w, mol_r) - - # connect substituent to main body with bonds - mol_w = Chem.RWMol(combo) - # if len(bonding_atoms_r) == 1: # substituent uses one atom to bond to main body - for atm in bonding_atoms_w: - bond_order = mol_w.GetAtomWithIdx(atm).GetNumRadicalElectrons() - mol_w.AddBond(atm, bonding_atoms_r[0], order=BOND_TYPES[bond_order]) - - # reset radical electrons - for atm in bonding_atoms_w: - mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0) - for atm in bonding_atoms_r: - mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0) - atoms_to_remove.append(i) + if "(" in symbol and len(bonds)==2:#(CH2)5 + connected_info = [(neighbor.GetIdx(), mol_w.GetBondBetweenAtoms(i, neighbor.GetIdx()).GetBondType()) for neighbor in atom.GetNeighbors()] + combined = Chem.RWMol(AllChem.CombineMols(mol_w, mol_r)) + combined.AddBond(connected_info[0][0], mol_w.GetNumAtoms(), connected_info[0][1]) + combined.AddBond(connected_info[1][0], mol_w.GetNumAtoms()+mol_r.GetNumAtoms()-1, connected_info[1][1]) + mol_w = combined + atoms_to_remove.append(i) + else: + # remove bonds connected to abbreviation/condensed formula + adjacent_indices = [bond.GetOtherAtomIdx(i) for bond in bonds] + for adjacent_idx in adjacent_indices: + mol_w.RemoveBond(i, adjacent_idx) + + adjacent_atoms = [mol_w.GetAtomWithIdx(adjacent_idx) for adjacent_idx in adjacent_indices] + for adjacent_atom, bond in zip(adjacent_atoms, bonds): + adjacent_atom.SetNumRadicalElectrons(int(bond.GetBondTypeAsDouble())) + + # get indices of atoms of main body that connect to substituent + bonding_atoms_w = adjacent_indices + # assume indices are concated after combine mol_w and mol_r + bonding_atoms_r = [mol_w.GetNumAtoms()] + for atm in mol_r.GetAtoms(): + if atm.GetNumRadicalElectrons() and atm.GetIdx() > 0: + bonding_atoms_r.append(mol_w.GetNumAtoms() + atm.GetIdx()) + + # combine main body and substituent into a single molecule object + combo = Chem.CombineMols(mol_w, mol_r) + + # connect substituent to main body with bonds + mol_w = Chem.RWMol(combo) + # if len(bonding_atoms_r) == 1: # substituent uses one atom to bond to main body + for atm in bonding_atoms_w: + bond_order = mol_w.GetAtomWithIdx(atm).GetNumRadicalElectrons() + mol_w.AddBond(atm, bonding_atoms_r[0], order=BOND_TYPES[bond_order]) + + # reset radical electrons + for atm in bonding_atoms_w: + mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0) + for atm in bonding_atoms_r: + mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0) + atoms_to_remove.append(i) # Remove atom in the end, otherwise the id will change # Reverse the order and remove atoms with larger id first