Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backmerge #1332 Aromatic atom list is not recognized in SMARTS query (#1370) #1378

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ Ok expected smarts generated
[#40,#79,#30]-[#6]-[#6] is ok. smarts_in==smarts_out
[#40,#79,#30]-[#6]-[#6] is ok. json_in==json_out
[#40,#79,#30]-[#6]-[#6] is ok. expected string found in json
[#6,#7;a]:[o] is ok. smarts_in==smarts_out
[#6,#7;a]:[o] is ok. json_in==json_out
[#6,#7;a]:[o] is ok. expected string found in json
[c,n,o]:[o] is ok. json_in==json_out
[c,n,o]:[o] is ok. expected string found in json
[c,C,c] is ok. json_in==json_out
[c,C,c] is ok. expected string found in json
[C,c] is ok. json_in==json_out
[C,c] is ok. expected string found in json
[C,c,n,o] is ok. json_in==json_out
[C,c,n,o] is ok. expected string found in json
32 changes: 25 additions & 7 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,22 @@ def test_smarts_load_save(smarts_in):
print("smarts_out=%s" % smarts_out)


def test_smarts_load_save_through_ket(smarts_in, expected_str):
def test_smarts_load_save_through_ket(
smarts_in, expected_str, check_smarts_eq=True
):
m1 = indigo.loadSmarts(smarts_in)
json1 = m1.json()
m2 = indigo.loadQueryMolecule(json1)
smarts_out = m2.smarts()
m3 = indigo.loadSmarts(smarts_out)
json2 = m3.json()
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s" % smarts_in)
print("smarts_out=%s" % smarts_out)
if check_smarts_eq:
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s" % smarts_in)
print("smarts_out=%s" % smarts_out)
if json1 == json2:
print("%s is ok. json_in==json_out" % smarts_in)
else:
Expand Down Expand Up @@ -204,3 +207,18 @@ def test_smarts_load_save_through_ket(smarts_in, expected_str):
smarts = "[#40,#79,#30]-[#6]-[#6]"
expected = '"atoms":[{"type":"atom-list","elements":["Zr","Au","Zn"],'
test_smarts_load_save_through_ket(smarts, expected)
smarts = "[#6,#7;a]:[o]"
expected = '"atoms":[{"type":"atom-list","elements":["C","N"],"location":[0.0,0.0,0.0],"queryProperties":{"aromaticity":"aromatic"}}'
test_smarts_load_save_through_ket(smarts, expected)
smarts = "[c,n,o]:[o]"
expected = '"atoms":[{"type":"atom-list","elements":["C","N","O"],"location":[0.0,0.0,0.0],"queryProperties":{"aromaticity":"aromatic"}}'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[c,C,c]"
expected = '"atoms":[{"label":"C","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"c,C,c"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[C,c]"
expected = '"atoms":[{"label":"C","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"C,c"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
smarts = "[C,c,n,o]"
expected = '"atoms":[{"label":"","location":[0.0,0.0,0.0],"queryProperties":{"customQuery":"C,c,n,o"}}]'
test_smarts_load_save_through_ket(smarts, expected, False)
9 changes: 5 additions & 4 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ namespace indigo

bool standardize(const StandardizeOptions& options);

static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, const Atom*>& properties);
static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, std::unique_ptr<Atom>>& properties);

protected:
void _getAtomDescription(Atom* atom, Output& out, int depth);
Expand All @@ -428,9 +428,10 @@ namespace indigo
void _removeBonds(const Array<int>& indices) override;

using AtomList = std::pair<bool, std::set<int>>;
static bool _isAtomListOr(const Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(const Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties);
static bool _isAtomList(const Atom* qa, AtomList list);
static bool _isAtomListOr(Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _isAtomList(Atom* qa, AtomList list);
static bool _tryToConvertToList(Atom* p_query_atom, std::vector<int>& atoms, std::map<int, std::unique_ptr<Atom>>& properties);

Array<int> _min_h;

Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/molecule/src/molecule_json_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer)
QS_DEF(Array<int>, rg_list);
int radical = 0;
int query_atom_type = QueryMolecule::QUERY_ATOM_UNKNOWN;
std::map<int, const QueryMolecule::Atom*> query_atom_properties;
std::map<int, std::unique_ptr<QueryMolecule::Atom>> query_atom_properties;
bool is_rSite = mol.isRSite(i);
if (is_rSite)
{
Expand Down
137 changes: 116 additions & 21 deletions core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,7 +2333,93 @@ QueryMolecule::Atom* QueryMolecule::stripKnownAttrs(QueryMolecule::Atom& qa)
return qd == NULL ? &qa : qd;
}

bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& list)
// TODO: develop function to convert tree to CNF to simplify checks
bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector<int>& atoms, std::map<int, std::unique_ptr<Atom>>& properties)
{
// Try to convert a1p1p2..pn, a2p1p2..pn, .. , akp1p2..pn to (a1, a2, .. an)p1p2..pn
if (!p_query_atom)
return false;
if (p_query_atom->type != OP_OR)
return false;
int size = p_query_atom->children.size();
if (size < 2)
return false;
std::vector<std::unique_ptr<Atom>> atoms_properties;
std::vector<int> atoms_list;
int list_element_child_count = -1;
for (int i = 0; i < size; i++)
{
std::unique_ptr<Atom> child(p_query_atom->child(i)->clone());
if (child->type != OP_AND)
return false;
if (list_element_child_count < 0)
{
list_element_child_count = child->children.size();
if (list_element_child_count < 2)
return false;
}
else
{
if (list_element_child_count != child->children.size())
return false; // All list elements should have same count of properties
}
std::vector<std::unique_ptr<Atom>> props;
bool atom_not_found = true;
for (int j = 0; j < child->children.size(); j++)
{
std::unique_ptr<Atom> child_prop(child->child(j)->clone());
switch (child_prop->type)
{
case OP_AND:
case OP_OR:
case OP_NOT:
case OP_NONE:
return false;
break;
case ATOM_NUMBER:
if (child_prop->value_min != child_prop->value_max)
return false;
atoms_list.emplace_back(child_prop->value_min);
atom_not_found = false;
break;
default:
if (i == 0) // first atom
atoms_properties.emplace_back(std::move(child_prop));
else
props.emplace_back(std::move(child_prop));
break;
}
}
if (atom_not_found)
return false;
auto compare = [](const std::unique_ptr<Atom>& a, const std::unique_ptr<Atom>& b) { return a->type > b->type; };
if (i == 0) // first atom
{
std::sort(atoms_properties.begin(), atoms_properties.end(), compare);
}
else
{
// check that children[i] props == chlidren[0] props
std::sort(props.begin(), props.end(), compare);
if (props.size() != atoms_properties.size())
return false;
for (int j = 0; j < props.size(); j++)
{
if (props[j]->type != atoms_properties[j]->type || props[j]->value_min != atoms_properties[j]->value_min ||
props[j]->value_max != atoms_properties[j]->value_max)
return false;
}
}
}
atoms = atoms_list;
for (auto& prop : atoms_properties)
{
properties[prop->type] = std::move(prop);
}
return true;
}

bool QueryMolecule::_isAtomListOr(Atom* p_query_atom, std::vector<int>& list)
{
// Check if p_query_atom atom list like or(a1,a2,a3, or(a4,a5,a6), a7)
if (!p_query_atom)
Expand All @@ -2343,8 +2429,8 @@ bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& li
std::vector<int> collected;
for (auto i = 0; i < p_query_atom->children.size(); i++)
{
Atom* p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(i);
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_max == p_query_atom_child->value_max)
Atom* p_query_atom_child = p_query_atom->child(i);
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)
{
collected.emplace_back(p_query_atom_child->value_min);
}
Expand All @@ -2362,22 +2448,20 @@ bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& li
return true;
}

bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties)
bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector<int>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties)
{
// Check if p_query_atom contains only atom or atom list and atom properties connected by "and"
// atom list is positive i.e. or(a1,a2,a3,or(a4,a5),a6) or negative
// negative list like is set of op_not(atom_number) or op_not(positevi list), this set connected by "and"
if (!p_query_atom)
return false;
std::vector<int> collected;
std::map<int, const Atom*> collected_properties;
if (p_query_atom->type != OP_AND)
{
const Atom* p_query_atom_child = p_query_atom;
Atom* p_query_atom_child = p_query_atom;
bool is_neg = false;
if (p_query_atom->type == OP_NOT)
{
p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(0);
p_query_atom_child = p_query_atom->child(0);
is_neg = true;
}
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)
Expand All @@ -2388,53 +2472,64 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::vector<
}
else if (!is_neg && p_query_atom_child->type > ATOM_NUMBER && p_query_atom_child->type <= ATOM_CHIRALITY) // atom property, no negative props here
{
properties.emplace(p_query_atom_child->type, p_query_atom_child);
properties[p_query_atom_child->type] = std::unique_ptr<Atom>(p_query_atom_child->clone());
return true;
}
else if (_isAtomListOr(p_query_atom_child, collected))
else
{
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
return true;
if (!is_neg && _tryToConvertToList(p_query_atom, list, properties))
{
return true;
}
std::vector<int> collected;
if (_isAtomListOr(p_query_atom_child, collected))
{
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
return true;
}
}

return false;
}
// OP_AND
for (auto i = 0; i < p_query_atom->children.size(); i++)
{
Atom* p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(i);
bool is_neg = false;
std::vector<int> collected;
std::map<int, std::unique_ptr<Atom>> collected_properties;
if (_isAtomOrListAndProps(p_query_atom_child, collected, is_neg, collected_properties))
{
if (list.size() > 0 && is_neg != neg) // allowed only one list type in set - positive or negative
return false;
neg = is_neg;
for (auto item : collected)
list.emplace_back(item);
collected.clear();
properties.insert(collected_properties.begin(), collected_properties.end());
collected_properties.clear();
for (auto& prop : collected_properties)
properties[prop.first] = std::move(prop.second);
}
else
return false;
}
return true;
}

int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, const Atom*>& properties)
int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, std::unique_ptr<Atom>>& properties)
{
std::vector<int> atom_list;
std::map<int, const Atom*> atom_pros;
std::map<int, std::unique_ptr<Atom>> atom_props;
bool negative = false;
QueryMolecule::Atom& qa = qm.getAtom(aid);
if (qa.type == QueryMolecule::OP_NONE)
return QUERY_ATOM_AH;
if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_pros))
if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_props))
{
list = atom_list;
std::sort(atom_list.begin(), atom_list.end());
properties.insert(atom_pros.begin(), atom_pros.end());
for (auto& prop : atom_props)
properties[prop.first] = std::move(prop.second);

if (negative)
{
Expand Down
Loading