Skip to content

Commit

Permalink
#1337 List of atoms from custom query is converted into SMARTS file …
Browse files Browse the repository at this point in the history
…without aliphatic attribute

 Fix code. Add UT.
  • Loading branch information
AliaksandrDziarkach committed Oct 24, 2023
1 parent 6965f89 commit 41bb141
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 4 deletions.
2 changes: 2 additions & 0 deletions api/tests/integration/ref/formats/custom_query.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
[#6]1-[#6]=[#6]-[#6]=[#6]-[b;r;3;s&2,X3]=1 OK. Expected string found.
**** #1331 wrong smarts for ring bond count as drawn ****
ket_with_rb_as_drawn.ket OK. Smarts equals expected string '[#6](-[#6])(-[#6;x0])-[#6]'
**** #1337 wrong smarts for ring bond count as drawn ****
ket_with_custom_query_with_list.ket OK. Smarts equals expected string '[#6]1-[#6]=[Cl,Br,I,Na,O]-[#6]=[#6]-[#6]=1'
5 changes: 5 additions & 0 deletions api/tests/integration/tests/formats/custom_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ def test_ket_to_smarts(filename, expected_str):
)
print("**** #1331 wrong smarts for ring bond count as drawn ****")
test_ket_to_smarts("ket_with_rb_as_drawn.ket", "[#6](-[#6])(-[#6;x0])-[#6]")

print("**** #1337 wrong smarts for ring bond count as drawn ****")
fname = "ket_with_custom_query_with_list.ket"
expected = "[#6]1-[#6]=[Cl,Br,I,Na,O]-[#6]=[#6]-[#6]=1"
test_ket_to_smarts(fname, expected)
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"root": {
"nodes": [
{
"$ref": "mol0"
}
]
},
"mol0": {
"type": "molecule",
"atoms": [
{
"label": "C",
"location": [
6.3348493576049809,
-5.550074577331543,
0.0
]
},
{
"label": "C",
"location": [
8.06515121459961,
-5.549589157104492,
0.0
]
},
{
"label": "C",
"location": [
7.2016377449035648,
-5.049966812133789,
0.0
]
},
{
"label": "C",
"location": [
8.06515121459961,
-6.55053186416626,
0.0
]
},
{
"label": "C",
"location": [
6.3348493576049809,
-6.555019855499268,
0.0
]
},
{
"label": "",
"location": [
7.203820705413818,
-7.050033092498779,
0.0
],
"queryProperties": {
"customQuery": "Cl,Br,I,Na,O"
}
}
],
"bonds": [
{
"type": 2,
"atoms": [
2,
0
]
},
{
"type": 2,
"atoms": [
3,
1
]
},
{
"type": 1,
"atoms": [
0,
4
]
},
{
"type": 1,
"atoms": [
1,
2
]
},
{
"type": 2,
"atoms": [
4,
5
]
},
{
"type": 1,
"atoms": [
5,
3
]
}
],
"sgroups": [
{
"type": "MUL",
"atoms": [
0,
1,
2,
3,
4,
5
],
"mul": 1
}
]
}
}
27 changes: 23 additions & 4 deletions core/indigo-core/molecule/src/smiles_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2872,7 +2872,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
if (strchr("esfog", scanner.lookNext()) == NULL)
{
if (first_in_brackets)
{
element = ELEM_H;
aromatic = ATOM_ALIPHATIC;
}
else
{
atom.hydrogens = 1;
Expand All @@ -2883,7 +2886,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
}
}
else
{
element = Element::fromTwoChars('H', scanner.readChar());
aromatic = ATOM_ALIPHATIC;
}
}
// The 'A' symbol is weird too. It can be the 'aliphatic' atomic primitive,
// and can also be Al, Ar, As, Ag, Au, At, Ac, or Am.
Expand All @@ -2899,7 +2905,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_AROMATICITY, ATOM_ALIPHATIC);
}
else
{
element = Element::fromTwoChars('A', scanner.readChar());
aromatic = ATOM_ALIPHATIC;
}
}
// Similarly, 'R' can start Rb, Ru, Rh, Re, Rn, Ra, Rf, Rg
else if (next == 'R')
Expand Down Expand Up @@ -2934,7 +2943,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
}
}
else
{
element = Element::fromTwoChars('R', scanner.readChar());
aromatic = ATOM_ALIPHATIC;
}
}
// Yet 'D' can start Db, Ds, Dy
else if (next == 'D')
Expand All @@ -2954,7 +2966,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SUBSTITUENTS, degree);
}
else
{
element = Element::fromTwoChars('D', scanner.readChar());
aromatic = ATOM_ALIPHATIC;
}
}
// ... and 'X' can start Xe
else if (next == 'X')
Expand All @@ -2974,7 +2989,10 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_CONNECTIVITY, conn);
}
else
{
element = Element::fromTwoChars('X', scanner.readChar());
aromatic = ATOM_ALIPHATIC;
}
}
else if (next == '*')
{
Expand Down Expand Up @@ -3181,22 +3199,23 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato
element = Element::fromTwoChars2(next, scanner.lookNext());
scanner.skip(1);
if (smarts_mode)
if (element == ELEM_As || element == ELEM_Se)
aromatic = ATOM_ALIPHATIC;
// if (element == ELEM_As || element == ELEM_Se)
aromatic = ATOM_ALIPHATIC;
}
else if ((next == 'C' && scanner.lookNext() == 'n') && first_in_brackets)
{
scanner.skip(1);
element = ELEM_Cn;
aromatic = ATOM_ALIPHATIC;
}
else
{
// It is a single-char uppercase element identifier then
element = Element::fromChar(next);

if (smarts_mode)
if (element == ELEM_B || element == ELEM_C || element == ELEM_N || element == ELEM_O || element == ELEM_P || element == ELEM_S)
aromatic = ATOM_ALIPHATIC;
// if (element == ELEM_B || element == ELEM_C || element == ELEM_N || element == ELEM_O || element == ELEM_P || element == ELEM_S)
aromatic = ATOM_ALIPHATIC;
}
}
else if (next == '@')
Expand Down

0 comments on commit 41bb141

Please sign in to comment.