Skip to content

Commit

Permalink
Backmerge #1332 Aromatic atom list is not recognized in SMARTS query (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
AliaksandrDziarkach committed Oct 25, 2023
1 parent 8fe5343 commit 4c058e1
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 33 deletions.
11 changes: 11 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ Ok expected smarts generated
[#40,#79,#30]-[#6]-[#6] is ok. smarts_in==smarts_out
[#40,#79,#30]-[#6]-[#6] is ok. json_in==json_out
[#40,#79,#30]-[#6]-[#6] is ok. expected string found in json
[#6,#7;a]:[o] is ok. smarts_in==smarts_out
[#6,#7;a]:[o] is ok. json_in==json_out
[#6,#7;a]:[o] is ok. expected string found in json
[c,n,o]:[o] is ok. json_in==json_out
[c,n,o]:[o] is ok. expected string found in json
[c,C,c] is ok. json_in==json_out
[c,C,c] is ok. expected string found in json
[C,c] is ok. json_in==json_out
[C,c] is ok. expected string found in json
[C,c,n,o] is ok. json_in==json_out
[C,c,n,o] is ok. expected string found in json
32 changes: 25 additions & 7 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,22 @@ def test_smarts_load_save(smarts_in):
print("smarts_out=%s" % smarts_out)


def test_smarts_load_save_through_ket(smarts_in, expected_str):
def test_smarts_load_save_through_ket(
smarts_in, expected_str, check_smarts_eq=True
):
m1 = indigo.loadSmarts(smarts_in)
json1 = m1.json()
m2 = indigo.loadQueryMolecule(json1)
smarts_out = m2.smarts()
m3 = indigo.loadSmarts(smarts_out)
json2 = m3.json()
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s" % smarts_in)
print("smarts_out=%s" % smarts_out)
if check_smarts_eq:
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s" % smarts_in)
print("smarts_out=%s" % smarts_out)
if json1 == json2:
print("%s is ok. json_in==json_out" % smarts_in)
else:
Expand Down Expand Up @@ -204,3 +207,18 @@ def test_smarts_load_save_through_ket(smarts_in, expected_str):
smarts = "[#40,#79,#30]-[#6]-[#6]"
expected = '"atoms":[{"type":"atom-list","elements":["Zr","Au","Zn"],'
test_smarts_load_save_through_ket(smarts, expected)
smarts = "[#6,#7;a]:[o]"
expected = '"atoms":[{"type":"atom-list","elements":["C","N"],"location":[0.0,0.0,0.0],"queryProperties":{"aromaticity":"aromatic"}}'
test_smarts_load_save_through_ket(smarts, expected)
smarts = "[c,n,o]:[o]"
expected = '"atoms":[{"type":"atom-list","elements":["C","N","O"],"location":[0.0,0.0,0.0],"queryProperties":{"aromaticity":"aromatic"}}'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[c,C,c]"
expected = '"atoms":[{"label":"C","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"c,C,c"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[C,c]"
expected = '"atoms":[{"label":"C","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"C,c"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[C,c,n,o]"
expected = '"atoms":[{"label":"","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"C,c,n,o"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
9 changes: 5 additions & 4 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ namespace indigo

bool standardize(const StandardizeOptions& options);

static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, const Atom*>& properties);
static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, std::unique_ptr<Atom>>& properties);

protected:
void _getAtomDescription(Atom* atom, Output& out, int depth);
Expand All @@ -428,9 +428,10 @@ namespace indigo
void _removeBonds(const Array<int>& indices) override;

using AtomList = std::pair<bool, std::set<int>>;
static bool _isAtomListOr(const Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(const Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties);
static bool _isAtomList(const Atom* qa, AtomList list);
static bool _isAtomListOr(Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _isAtomList(Atom* qa, AtomList list);
static bool _tryToConvertToList(Atom* p_query_atom, std::vector<int>& atoms, std::map<int, std::unique_ptr<Atom>>& properties);

Array<int> _min_h;

Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/molecule/src/molecule_json_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer)
QS_DEF(Array<int>, rg_list);
int radical = 0;
int query_atom_type = QueryMolecule::QUERY_ATOM_UNKNOWN;
std::map<int, const QueryMolecule::Atom*> query_atom_properties;
std::map<int, std::unique_ptr<QueryMolecule::Atom>> query_atom_properties;
bool is_rSite = mol.isRSite(i);
if (is_rSite)
{
Expand Down
137 changes: 116 additions & 21 deletions core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,7 +2333,93 @@ QueryMolecule::Atom* QueryMolecule::stripKnownAttrs(QueryMolecule::Atom& qa)
return qd == NULL ? &qa : qd;
}

bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& list)
// TODO: develop function to convert tree to CNF to simplify checks
bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector<int>& atoms, std::map<int, std::unique_ptr<Atom>>& properties)
{
// Try to convert a1p1p2..pn, a2p1p2..pn, .. , akp1p2..pn to (a1, a2, .. an)p1p2..pn
if (!p_query_atom)
return false;
if (p_query_atom->type != OP_OR)
return false;
int size = p_query_atom->children.size();
if (size < 2)
return false;
std::vector<std::unique_ptr<Atom>> atoms_properties;
std::vector<int> atoms_list;
int list_element_child_count = -1;
for (int i = 0; i < size; i++)
{
std::unique_ptr<Atom> child(p_query_atom->child(i)->clone());
if (child->type != OP_AND)
return false;
if (list_element_child_count < 0)
{
list_element_child_count = child->children.size();
if (list_element_child_count < 2)
return false;
}
else
{
if (list_element_child_count != child->children.size())
return false; // All list elements should have same count of properties
}
std::vector<std::unique_ptr<Atom>> props;
bool atom_not_found = true;
for (int j = 0; j < child->children.size(); j++)
{
std::unique_ptr<Atom> child_prop(child->child(j)->clone());
switch (child_prop->type)
{
case OP_AND:
case OP_OR:
case OP_NOT:
case OP_NONE:
return false;
break;
case ATOM_NUMBER:
if (child_prop->value_min != child_prop->value_max)
return false;
atoms_list.emplace_back(child_prop->value_min);
atom_not_found = false;
break;
default:
if (i == 0) // first atom
atoms_properties.emplace_back(std::move(child_prop));
else
props.emplace_back(std::move(child_prop));
break;
}
}
if (atom_not_found)
return false;
auto compare = [](const std::unique_ptr<Atom>& a, const std::unique_ptr<Atom>& b) { return a->type > b->type; };
if (i == 0) // first atom
{
std::sort(atoms_properties.begin(), atoms_properties.end(), compare);
}
else
{
// check that children[i] props == chlidren[0] props
std::sort(props.begin(), props.end(), compare);
if (props.size() != atoms_properties.size())
return false;
for (int j = 0; j < props.size(); j++)
{
if (props[j]->type != atoms_properties[j]->type || props[j]->value_min != atoms_properties[j]->value_min ||
props[j]->value_max != atoms_properties[j]->value_max)
return false;
}
}
}
atoms = atoms_list;
for (auto& prop : atoms_properties)
{
properties[prop->type] = std::move(prop);
}
return true;
}

bool QueryMolecule::_isAtomListOr(Atom* p_query_atom, std::vector<int>& list)
{
// Check if p_query_atom atom list like or(a1,a2,a3, or(a4,a5,a6), a7)
if (!p_query_atom)
Expand All @@ -2343,8 +2429,8 @@ bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& li
std::vector<int> collected;
for (auto i = 0; i < p_query_atom->children.size(); i++)
{
Atom* p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(i);
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_max == p_query_atom_child->value_max)
Atom* p_query_atom_child = p_query_atom->child(i);
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)
{
collected.emplace_back(p_query_atom_child->value_min);
}
Expand All @@ -2362,22 +2448,20 @@ bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& li
return true;
}

bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties)
bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector<int>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties)
{
// Check if p_query_atom contains only atom or atom list and atom properties connected by "and"
// atom list is positive i.e. or(a1,a2,a3,or(a4,a5),a6) or negative
// negative list like is set of op_not(atom_number) or op_not(positevi list), this set connected by "and"
if (!p_query_atom)
return false;
std::vector<int> collected;
std::map<int, const Atom*> collected_properties;
if (p_query_atom->type != OP_AND)
{
const Atom* p_query_atom_child = p_query_atom;
Atom* p_query_atom_child = p_query_atom;
bool is_neg = false;
if (p_query_atom->type == OP_NOT)
{
p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(0);
p_query_atom_child = p_query_atom->child(0);
is_neg = true;
}
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)
Expand All @@ -2388,53 +2472,64 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::vector<
}
else if (!is_neg && p_query_atom_child->type > ATOM_NUMBER && p_query_atom_child->type <= ATOM_CHIRALITY) // atom property, no negative props here
{
properties.emplace(p_query_atom_child->type, p_query_atom_child);
properties[p_query_atom_child->type] = std::unique_ptr<Atom>(p_query_atom_child->clone());
return true;
}
else if (_isAtomListOr(p_query_atom_child, collected))
else
{
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
return true;
if (!is_neg && _tryToConvertToList(p_query_atom, list, properties))
{
return true;
}
std::vector<int> collected;
if (_isAtomListOr(p_query_atom_child, collected))
{
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
return true;
}
}

return false;
}
// OP_AND
for (auto i = 0; i < p_query_atom->children.size(); i++)
{
Atom* p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(i);
bool is_neg = false;
std::vector<int> collected;
std::map<int, std::unique_ptr<Atom>> collected_properties;
if (_isAtomOrListAndProps(p_query_atom_child, collected, is_neg, collected_properties))
{
if (list.size() > 0 && is_neg != neg) // allowed only one list type in set - positive or negative
return false;
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
collected.clear();
properties.insert(collected_properties.begin(), collected_properties.end());
collected_properties.clear();
for (auto& prop : collected_properties)
properties[prop.first] = std::move(prop.second);
}
else
return false;
}
return true;
}

int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, const Atom*>& properties)
int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, std::unique_ptr<Atom>>& properties)
{
std::vector<int> atom_list;
std::map<int, const Atom*> atom_pros;
std::map<int, std::unique_ptr<Atom>> atom_props;
bool negative = false;
QueryMolecule::Atom& qa = qm.getAtom(aid);
if (qa.type == QueryMolecule::OP_NONE)
return QUERY_ATOM_AH;
if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_pros))
if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_props))
{
list = atom_list;
std::sort(atom_list.begin(), atom_list.end());
properties.insert(atom_pros.begin(), atom_pros.end());
for (auto& prop : atom_props)
properties[prop.first] = std::move(prop.second);

if (negative)
{
Expand Down

0 comments on commit 4c058e1

Please sign in to comment.