From f191642ea1a258c739051741d7c7c890e40d2a7c Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Fri, 27 Dec 2024 17:08:54 +0300 Subject: [PATCH] #2361 - Wrong error message if SMILES phosphate has lack of attachemt point (#2715) --- .../ref/formats/helm_to_ket.py.out | 2 + .../integration/tests/formats/helm_to_ket.py | 2 + .../tests/formats/ref/helm_smiles_no_ap.ket | 103 ++++++++++++++++++ .../molecule/src/sequence_loader.cpp | 19 +++- 4 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 api/tests/integration/tests/formats/ref/helm_smiles_no_ap.ket diff --git a/api/tests/integration/ref/formats/helm_to_ket.py.out b/api/tests/integration/ref/formats/helm_to_ket.py.out index b4d46f1318..fd8e7d2541 100644 --- a/api/tests/integration/ref/formats/helm_to_ket.py.out +++ b/api/tests/integration/ref/formats/helm_to_ket.py.out @@ -13,6 +13,7 @@ helm_peptide.ket:SUCCEED helm_rna_without_base.ket:SUCCEED helm_simple_rna.ket:SUCCEED helm_smiles.ket:SUCCEED +helm_smiles_no_ap.ket:SUCCEED helm_smiles_sugar.ket:SUCCEED helm_unsplit.ket:SUCCEED Test 'CHEM1{[A6OH]}|PEPTIDE1{A}$CHEM1,PEPTIDE1,1:R2-3:R1$$$V2.0': got expected error 'Polymer 'PEPTIDE1' does not contains monomer with number 3.' @@ -27,3 +28,4 @@ Test 'PEPTIDE1{A'2'}$$$$V2.0': got expected error 'Repeating not supported now.' Test 'PEPTIDE1{D-gGlu}$$$$V2.0': got expected error 'Unexpected symbol. Expected '.' or '}' but found '-'.' Test 'RNA1{R(A).R(A)p}$$$$V2.0': got expected error 'Monomer template with class 'Phosphate' and alias 'R' not found in monomer librarys' Test 'RNA1{R(bla-bla-bla)p}$$$$V2.0': got expected error 'Unexpected symbol. Expected ')' but found 'l'.' +Test 'RNA1{R[P(O)(O)(=O)O]}$$$$V2.0': got expected error 'Unknown attachment point 'R1' in monomer Mod0' diff --git a/api/tests/integration/tests/formats/helm_to_ket.py b/api/tests/integration/tests/formats/helm_to_ket.py index f07b22f905..4de3bc08df 100644 --- a/api/tests/integration/tests/formats/helm_to_ket.py +++ b/api/tests/integration/tests/formats/helm_to_ket.py @@ -45,6 +45,7 @@ def find_diff(a, b): "helm_fractional_ratio": "PEPTIDE1{(A:1.5+C:0.1+G:3)}$$$$V2.0", "helm_chem_rna_hydro": "CHEM1{[MCC]}|RNA1{R(U)P}$CHEM1,RNA1,1:pair-3:pair$$$V2.0", "helm_unsplit": "RNA1{[5Br-dU]}$$$$V2.0", + "helm_smiles_no_ap": "CHEM1{[P(O)(O)(=O)O]}$$$$V2.0", } lib = indigo.loadMonomerLibraryFromFile( @@ -78,6 +79,7 @@ def find_diff(a, b): "PEPTIDE1{(A:+C:0.1)}$$$$V2.0": "Unexpected symbol. Expected digit but found '+'", "RNA1{R(A).R(A)p}$$$$V2.0": "Monomer template with class 'Phosphate' and alias 'R' not found in monomer librarys", "PEPTIDE1{(A:1.5.+C:0.1)}$$$$V2.0": "Enexpected symbol. Second dot in number", + "RNA1{R[P(O)(O)(=O)O]}$$$$V2.0": "Unknown attachment point 'R1' in monomer Mod0", } for helm_seq in sorted(helm_errors.keys()): error = helm_errors[helm_seq] diff --git a/api/tests/integration/tests/formats/ref/helm_smiles_no_ap.ket b/api/tests/integration/tests/formats/ref/helm_smiles_no_ap.ket new file mode 100644 index 0000000000..f2fc5c58e1 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_smiles_no_ap.ket @@ -0,0 +1,103 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + } + ], + "templates": [ + { + "$ref": "monomerTemplate-Mod0" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 1, + "position": { + "x": 0.000000, + "y": -0.000000 + }, + "alias": "Mod0", + "templateId": "Mod0" + }, + "monomerTemplate-Mod0": { + "type": "monomerTemplate", + "id": "Mod0", + "class": "CHEM", + "alias": "Mod0", + "atoms": [ + { + "label": "P", + "location": [ + 0.000000, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -0.500000, + -0.866025, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.866025, + -0.500000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -0.866025, + 0.500000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.500000, + 0.866025, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index c37279a4a2..a5c8fcba64 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -1330,7 +1330,24 @@ std::string SequenceLoader::readHelmMonomerAlias(KetDocument& document, MonomerC throw Error(unexpected_eod); if (ch != ']') throw Error("Unexpected symbol. Expected ']' but found '%c'.", ch); - if (smiles) + bool found = false; + if (_library.getMonomerTemplateIdByAlias(monomer_class, monomer_alias).size() > 0) + { + found = true; + } + else if (monomer_class == MonomerClass::Sugar) // In place of sugar can be phosphate or unsplit rna + { + if (_library.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, monomer_alias).size() > 0) + { + found = true; + } + else + { + if (_library.getMonomerTemplateIdByAlias(MonomerClass::RNA, monomer_alias).size() > 0) + found = true; + } + } + if (smiles || !found) // Monomer alias not found in library - try read as smiles { // Convert smiles to molecule BufferScanner scanner(monomer_alias.c_str());