From b7a80d65ac4a6d42df550bb6668bc89c778ebbfa Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Thu, 1 Sep 2022 16:44:19 +0100 Subject: [PATCH 1/7] test search_engine vs mapr --- Search_Engine_test.ipynb | 280 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 Search_Engine_test.ipynb diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb new file mode 100644 index 00000000..330d598a --- /dev/null +++ b/Search_Engine_test.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "39a371b9", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d3b6c7f8", + "metadata": {}, + "outputs": [], + "source": [ + "INDEX_PAGE = \"https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1\"\n", + "SEARCH_ENGINE_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "12d45593", + "metadata": {}, + "outputs": [], + "source": [ + "# URL to use mapr\n", + "MAPR_URL = \"https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n", + "SCREENS_PROJECTS_URL = \"https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n", + "PLATES_URL = \"https://idr-testing.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n", + "DATASETS_URL = \"https://idr-testing.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}\"\n", + "IMAGES_URL = \"https://idr-testing.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a92a373d", + "metadata": {}, + "outputs": [], + "source": [ + "# create http session\n", + "with requests.Session() as session:\n", + " request = requests.Request('GET', INDEX_PAGE)\n", + " prepped = session.prepare_request(request)\n", + " response = session.send(prepped)\n", + " if response.status_code != 200:\n", + " response.raise_for_status()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4ae90355", + "metadata": {}, + "outputs": [], + "source": [ + "KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + \"?key={key}&value={value}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ec871b3e", + "metadata": {}, + "outputs": [], + "source": [ + "KEY = \"Gene Symbol\"\n", + "KEY_MAPR = \"gene\"\n", + "GENES = [\"pax1\", \"pep\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "5c606dd9", + "metadata": {}, + "outputs": [], + "source": [ + "# Helper method retrieving the result using directly the search api\n", + "def load_using_search_api():\n", + " results = {}\n", + " for gene in GENES:\n", + " ids = []\n", + " qs1 = {'type': 'image', 'key': KEY, 'value': gene}\n", + " url = KEY_VALUE_SEARCH.format(**qs1) \n", + " json = session.get(url).json()\n", + " images = json['results']['results']\n", + " for image in images:\n", + " if image['id'] not in ids:\n", + " ids.append(image['id'])\n", + " results[gene.lower()] = ids\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "7dcf6ab8", + "metadata": {}, + "outputs": [], + "source": [ + "def get_genes():\n", + " genes = []\n", + " for gene in GENES:\n", + " qs1 = {'key': KEY_MAPR, 'value': gene}\n", + " url = MAPR_URL.format(**qs1)\n", + " json = session.get(url).json()\n", + " for m in json['maps']: \n", + " genes.append(m['id'])\n", + " return genes\n", + "\n", + "def parse_annotation(images, json_data, gene, name, data_type):\n", + " screen_name = \"-\"\n", + " plate_name = \"-\"\n", + " project_name = \"-\"\n", + " dataset_name = \"-\"\n", + " if data_type == 'datasets':\n", + " project_name = name\n", + " else:\n", + " screen_name = name\n", + " \n", + " for p in json_data[data_type]:\n", + " parent_id = p['id']\n", + " if data_type == 'datasets':\n", + " dataset_name = p['name']\n", + " else:\n", + " plate_name = p['name']\n", + " qs3 = {'key': KEY_MAPR, 'value': gene,\n", + " 'parent_type': data_type[:-1], 'parent_id': parent_id}\n", + " url3 = IMAGES_URL.format(**qs3)\n", + " json = session.get(url3).json()\n", + " for i in json['images']:\n", + " if i['id'] not in images:\n", + " images.append(i['id'])\n", + " \n", + "def load_using_mapr():\n", + " results = {}\n", + " genes = get_genes()\n", + " images = []\n", + " for gene in genes:\n", + " qs1 = {'key': KEY_MAPR, 'value': gene}\n", + " url1 = MAPR_URL.format(**qs1)\n", + " json = session.get(url1).json()\n", + " for m in json['maps']:\n", + " qs2 = {'key': KEY_MAPR, 'value': gene}\n", + " url2 = SCREENS_PROJECTS_URL.format(**qs2)\n", + " json = session.get(url2).json()\n", + " for s in json['screens']:\n", + " gene = s['extra']['value']\n", + " qs3 = {'key': KEY_MAPR, 'value': gene, 'screen_id': s['id']}\n", + " url3 = PLATES_URL.format(**qs3)\n", + " parse_annotation(images, session.get(url3).json(), gene, s['name'], 'plates')\n", + " for p in json['projects']:\n", + " gene = s['extra']['value']\n", + " qs3 = {'key': KEY_MAPR, 'value': gene, 'project_id': p['id']}\n", + " url3 = DATASETS_URL.format(**qs3)\n", + " parse_annotation(images, session.get(url3).json(), gene, p['name'], 'datasets')\n", + " results[gene.lower()] = images\n", + " return results\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "0f968d98", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 62.7 ms, sys: 19.9 ms, total: 82.6 ms\n", + "Wall time: 906 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "results = load_using_search_api()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "b2e40507", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 292 ms, sys: 50.4 ms, total: 342 ms\n", + "Wall time: 3.83 s\n" + ] + } + ], + "source": [ + "%%time\n", + "results_mapr = load_using_mapr()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "ee10d1f5", + "metadata": {}, + "outputs": [], + "source": [ + "def dict_compare(d1, d2):\n", + " d1_keys = set(d1.keys())\n", + " d2_keys = set(d2.keys())\n", + " shared_keys = d1_keys.intersection(d2_keys)\n", + " added = d1_keys - d2_keys\n", + " removed = d2_keys - d1_keys \n", + " modified = {o : (d1[o], d2[o]) for o in shared_keys if d1[o].sort() != d2[o].sort()}\n", + " same = set(o for o in shared_keys if d1[o].sort() == d2[o].sort())\n", + " return added, removed, modified, same" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "16f24578", + "metadata": {}, + "outputs": [], + "source": [ + "added, removed, modified, same = dict_compare(results, results_mapr)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "4adb7c3d", + "metadata": {}, + "outputs": [], + "source": [ + "assert len(added) == 0\n", + "assert len(removed) == 0\n", + "assert len(modified) == 0\n", + "assert len(same) == len(GENES)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2ac2158", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:stardist]", + "language": "python", + "name": "conda-env-stardist-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a0bd05534b30948c9dd4877202b9a53ea68fc149 Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Tue, 6 Sep 2022 11:35:15 +0100 Subject: [PATCH 2/7] add doc --- Search_Engine_test.ipynb | 158 +++++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 57 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index 330d598a..315656b2 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "39a371b9", "metadata": {}, "outputs": [], @@ -11,20 +11,37 @@ "import json" ] }, + { + "cell_type": "markdown", + "id": "a351b7e9", + "metadata": {}, + "source": [ + "### URL to use to search via search engine" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "d3b6c7f8", "metadata": {}, "outputs": [], "source": [ "INDEX_PAGE = \"https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1\"\n", - "SEARCH_ENGINE_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/\"" + "SEARCH_ENGINE_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/\"\n", + "KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + \"?key={key}&value={value}\"" + ] + }, + { + "cell_type": "markdown", + "id": "a9bd9222", + "metadata": {}, + "source": [ + "### URLs to use to search via ``mapr``" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "12d45593", "metadata": {}, "outputs": [], @@ -39,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "a92a373d", "metadata": {}, "outputs": [], @@ -55,29 +72,30 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "4ae90355", + "execution_count": 6, + "id": "ec871b3e", "metadata": {}, "outputs": [], "source": [ - "KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + \"?key={key}&value={value}\"" + "# Key used by search engine\n", + "KEY = \"Gene Symbol\"\n", + "# Mapr equivalent key\n", + "KEY_MAPR = \"gene\"\n", + "# List of items to search for\n", + "ITEMS = [\"pax1\", \"pep\"]" ] }, { - "cell_type": "code", - "execution_count": 25, - "id": "ec871b3e", + "cell_type": "markdown", + "id": "372baec2", "metadata": {}, - "outputs": [], "source": [ - "KEY = \"Gene Symbol\"\n", - "KEY_MAPR = \"gene\"\n", - "GENES = [\"pax1\", \"pep\"]" + "### Helper method to retrieve images using the search engine" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 13, "id": "5c606dd9", "metadata": {}, "outputs": [], @@ -85,37 +103,45 @@ "# Helper method retrieving the result using directly the search api\n", "def load_using_search_api():\n", " results = {}\n", - " for gene in GENES:\n", + " for item in ITEMS:\n", " ids = []\n", - " qs1 = {'type': 'image', 'key': KEY, 'value': gene}\n", + " qs1 = {'type': 'image', 'key': KEY, 'value': item}\n", " url = KEY_VALUE_SEARCH.format(**qs1) \n", " json = session.get(url).json()\n", " images = json['results']['results']\n", " for image in images:\n", " if image['id'] not in ids:\n", " ids.append(image['id'])\n", - " results[gene.lower()] = ids\n", + " results[item.lower()] = ids\n", " return results" ] }, + { + "cell_type": "markdown", + "id": "fda3a37c", + "metadata": {}, + "source": [ + "### Helper method to retrieve images using ``mapr``" + ] + }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 20, "id": "7dcf6ab8", "metadata": {}, "outputs": [], "source": [ - "def get_genes():\n", - " genes = []\n", - " for gene in GENES:\n", - " qs1 = {'key': KEY_MAPR, 'value': gene}\n", + "def get_items():\n", + " items = []\n", + " for item in ITEMS:\n", + " qs1 = {'key': KEY_MAPR, 'value': item}\n", " url = MAPR_URL.format(**qs1)\n", " json = session.get(url).json()\n", " for m in json['maps']: \n", - " genes.append(m['id'])\n", - " return genes\n", + " items.append(m['id'])\n", + " return items\n", "\n", - "def parse_annotation(images, json_data, gene, name, data_type):\n", + "def parse_annotation(images, json_data, item, name, data_type):\n", " screen_name = \"-\"\n", " plate_name = \"-\"\n", " project_name = \"-\"\n", @@ -131,7 +157,7 @@ " dataset_name = p['name']\n", " else:\n", " plate_name = p['name']\n", - " qs3 = {'key': KEY_MAPR, 'value': gene,\n", + " qs3 = {'key': KEY_MAPR, 'value': item,\n", " 'parent_type': data_type[:-1], 'parent_id': parent_id}\n", " url3 = IMAGES_URL.format(**qs3)\n", " json = session.get(url3).json()\n", @@ -141,34 +167,42 @@ " \n", "def load_using_mapr():\n", " results = {}\n", - " genes = get_genes()\n", + " items = get_items()\n", " images = []\n", - " for gene in genes:\n", - " qs1 = {'key': KEY_MAPR, 'value': gene}\n", + " for item in items:\n", + " qs1 = {'key': KEY_MAPR, 'value': item}\n", " url1 = MAPR_URL.format(**qs1)\n", " json = session.get(url1).json()\n", " for m in json['maps']:\n", - " qs2 = {'key': KEY_MAPR, 'value': gene}\n", + " qs2 = {'key': KEY_MAPR, 'value': item}\n", " url2 = SCREENS_PROJECTS_URL.format(**qs2)\n", " json = session.get(url2).json()\n", " for s in json['screens']:\n", - " gene = s['extra']['value']\n", - " qs3 = {'key': KEY_MAPR, 'value': gene, 'screen_id': s['id']}\n", + " item = s['extra']['value']\n", + " qs3 = {'key': KEY_MAPR, 'value': item, 'screen_id': s['id']}\n", " url3 = PLATES_URL.format(**qs3)\n", - " parse_annotation(images, session.get(url3).json(), gene, s['name'], 'plates')\n", + " parse_annotation(images, session.get(url3).json(), item, s['name'], 'plates')\n", " for p in json['projects']:\n", - " gene = s['extra']['value']\n", - " qs3 = {'key': KEY_MAPR, 'value': gene, 'project_id': p['id']}\n", + " item = s['extra']['value']\n", + " qs3 = {'key': KEY_MAPR, 'value': item, 'project_id': p['id']}\n", " url3 = DATASETS_URL.format(**qs3)\n", - " parse_annotation(images, session.get(url3).json(), gene, p['name'], 'datasets')\n", - " results[gene.lower()] = images\n", + " parse_annotation(images, session.get(url3).json(), item, p['name'], 'datasets')\n", + " results[item.lower()] = images\n", " return results\n", " " ] }, + { + "cell_type": "markdown", + "id": "e7cb2ea8", + "metadata": {}, + "source": [ + "### Search using search engine " + ] + }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 21, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -176,8 +210,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 62.7 ms, sys: 19.9 ms, total: 82.6 ms\n", - "Wall time: 906 ms\n" + "CPU times: user 65.9 ms, sys: 35.9 ms, total: 102 ms\n", + "Wall time: 1.19 s\n" ] } ], @@ -186,9 +220,17 @@ "results = load_using_search_api()" ] }, + { + "cell_type": "markdown", + "id": "77bb8ccf", + "metadata": {}, + "source": [ + "### Search using ``mapr`` " + ] + }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 22, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -196,8 +238,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 292 ms, sys: 50.4 ms, total: 342 ms\n", - "Wall time: 3.83 s\n" + "CPU times: user 364 ms, sys: 73.2 ms, total: 437 ms\n", + "Wall time: 4.52 s\n" ] } ], @@ -206,9 +248,19 @@ "results_mapr = load_using_mapr()" ] }, + { + "cell_type": "markdown", + "id": "f8e5f8b8", + "metadata": {}, + "source": [ + "### Compare the outputs of the search\n", + "\n", + "The checks below compare the keys e.g. gene list and the values i.e. image ids" + ] + }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 23, "id": "ee10d1f5", "metadata": {}, "outputs": [], @@ -226,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 24, "id": "16f24578", "metadata": {}, "outputs": [], @@ -236,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 26, "id": "4adb7c3d", "metadata": {}, "outputs": [], @@ -244,16 +296,8 @@ "assert len(added) == 0\n", "assert len(removed) == 0\n", "assert len(modified) == 0\n", - "assert len(same) == len(GENES)" + "assert len(same) == len(ITEMS)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2ac2158", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 0b9c4d1cd4c820e8b6d5e54831b65d8263f92752 Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Tue, 6 Sep 2022 13:52:04 +0100 Subject: [PATCH 3/7] handle case without results --- Search_Engine_test.ipynb | 49 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index 315656b2..b2cbab89 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 27, "id": "39a371b9", "metadata": {}, "outputs": [], @@ -13,7 +13,7 @@ }, { "cell_type": "markdown", - "id": "a351b7e9", + "id": "dc77ab1e", "metadata": {}, "source": [ "### URL to use to search via search engine" @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 28, "id": "d3b6c7f8", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "a9bd9222", + "id": "a1834176", "metadata": {}, "source": [ "### URLs to use to search via ``mapr``" @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 29, "id": "12d45593", "metadata": {}, "outputs": [], @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 30, "id": "a92a373d", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 35, "id": "ec871b3e", "metadata": {}, "outputs": [], @@ -87,7 +87,7 @@ }, { "cell_type": "markdown", - "id": "372baec2", + "id": "2c61954a", "metadata": {}, "source": [ "### Helper method to retrieve images using the search engine" @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 42, "id": "5c606dd9", "metadata": {}, "outputs": [], @@ -108,17 +108,18 @@ " qs1 = {'type': 'image', 'key': KEY, 'value': item}\n", " url = KEY_VALUE_SEARCH.format(**qs1) \n", " json = session.get(url).json()\n", - " images = json['results']['results']\n", - " for image in images:\n", - " if image['id'] not in ids:\n", - " ids.append(image['id'])\n", + " if 'results' in json['results']:\n", + " images = json['results']['results']\n", + " for image in images:\n", + " if image['id'] not in ids:\n", + " ids.append(image['id'])\n", " results[item.lower()] = ids\n", " return results" ] }, { "cell_type": "markdown", - "id": "fda3a37c", + "id": "7686ac38", "metadata": {}, "source": [ "### Helper method to retrieve images using ``mapr``" @@ -126,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 43, "id": "7dcf6ab8", "metadata": {}, "outputs": [], @@ -194,7 +195,7 @@ }, { "cell_type": "markdown", - "id": "e7cb2ea8", + "id": "7454f092", "metadata": {}, "source": [ "### Search using search engine " @@ -202,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 44, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -210,8 +211,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 65.9 ms, sys: 35.9 ms, total: 102 ms\n", - "Wall time: 1.19 s\n" + "CPU times: user 37.8 ms, sys: 11.5 ms, total: 49.3 ms\n", + "Wall time: 451 ms\n" ] } ], @@ -222,7 +223,7 @@ }, { "cell_type": "markdown", - "id": "77bb8ccf", + "id": "80181efd", "metadata": {}, "source": [ "### Search using ``mapr`` " @@ -230,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 45, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -238,8 +239,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 364 ms, sys: 73.2 ms, total: 437 ms\n", - "Wall time: 4.52 s\n" + "CPU times: user 6.65 ms, sys: 2.73 ms, total: 9.38 ms\n", + "Wall time: 70 ms\n" ] } ], @@ -250,7 +251,7 @@ }, { "cell_type": "markdown", - "id": "f8e5f8b8", + "id": "a14df6c2", "metadata": {}, "source": [ "### Compare the outputs of the search\n", From 5a3cf6cdde0b35cde7a43edb4a73b90583825d1b Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Wed, 7 Sep 2022 10:30:21 +0100 Subject: [PATCH 4/7] handle no key found for mapr --- Search_Engine_test.ipynb | 74 +++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index b2cbab89..e524e323 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 27, + "execution_count": 1, "id": "39a371b9", "metadata": {}, "outputs": [], @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 2, "id": "d3b6c7f8", "metadata": {}, "outputs": [], @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "id": "12d45593", "metadata": {}, "outputs": [], @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 4, "id": "a92a373d", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 5, "id": "ec871b3e", "metadata": {}, "outputs": [], @@ -82,7 +82,8 @@ "# Mapr equivalent key\n", "KEY_MAPR = \"gene\"\n", "# List of items to search for\n", - "ITEMS = [\"pax1\", \"pep\"]" + "ITEMS = [\"pax1\", \"pep\"]\n", + "ITEMS = [\"blah\"]" ] }, { @@ -95,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 6, "id": "5c606dd9", "metadata": {}, "outputs": [], @@ -127,20 +128,23 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 23, "id": "7dcf6ab8", "metadata": {}, "outputs": [], "source": [ "def get_items():\n", " items = []\n", + " not_found = []\n", " for item in ITEMS:\n", " qs1 = {'key': KEY_MAPR, 'value': item}\n", " url = MAPR_URL.format(**qs1)\n", " json = session.get(url).json()\n", + " if len(json['maps']) == 0:\n", + " not_found.append(item)\n", " for m in json['maps']: \n", " items.append(m['id'])\n", - " return items\n", + " return items, not_found\n", "\n", "def parse_annotation(images, json_data, item, name, data_type):\n", " screen_name = \"-\"\n", @@ -168,7 +172,7 @@ " \n", "def load_using_mapr():\n", " results = {}\n", - " items = get_items()\n", + " items, not_found = get_items()\n", " images = []\n", " for item in items:\n", " qs1 = {'key': KEY_MAPR, 'value': item}\n", @@ -189,6 +193,8 @@ " url3 = DATASETS_URL.format(**qs3)\n", " parse_annotation(images, session.get(url3).json(), item, p['name'], 'datasets')\n", " results[item.lower()] = images\n", + " for n in not_found:\n", + " results[n.lower()] = []\n", " return results\n", " " ] @@ -203,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 24, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -211,8 +217,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 37.8 ms, sys: 11.5 ms, total: 49.3 ms\n", - "Wall time: 451 ms\n" + "CPU times: user 38.1 ms, sys: 8 ms, total: 46.1 ms\n", + "Wall time: 412 ms\n" ] } ], @@ -231,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 25, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -239,8 +245,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.65 ms, sys: 2.73 ms, total: 9.38 ms\n", - "Wall time: 70 ms\n" + "CPU times: user 9.86 ms, sys: 3.55 ms, total: 13.4 ms\n", + "Wall time: 87.5 ms\n" ] } ], @@ -261,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "id": "ee10d1f5", "metadata": {}, "outputs": [], @@ -279,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "id": "16f24578", "metadata": {}, "outputs": [], @@ -289,16 +295,44 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 31, "id": "4adb7c3d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{}\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [31]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(results_mapr)\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(added) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(removed) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(modified) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], "source": [ + "print(results_mapr)\n", "assert len(added) == 0\n", "assert len(removed) == 0\n", "assert len(modified) == 0\n", "assert len(same) == len(ITEMS)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e26543b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 6fa6e7e756aad9ad2de3de546a2753a3e30d4b8f Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Wed, 7 Sep 2022 21:15:54 +0100 Subject: [PATCH 5/7] remove print --- Search_Engine_test.ipynb | 57 ++++++++++------------------------------ 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index e524e323..e4daf211 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "ec871b3e", "metadata": {}, "outputs": [], @@ -82,8 +82,7 @@ "# Mapr equivalent key\n", "KEY_MAPR = \"gene\"\n", "# List of items to search for\n", - "ITEMS = [\"pax1\", \"pep\"]\n", - "ITEMS = [\"blah\"]" + "ITEMS = [\"pax1\", \"pep\", \"blah\"]" ] }, { @@ -96,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "id": "5c606dd9", "metadata": {}, "outputs": [], @@ -128,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 15, "id": "7dcf6ab8", "metadata": {}, "outputs": [], @@ -209,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 16, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -217,8 +216,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 38.1 ms, sys: 8 ms, total: 46.1 ms\n", - "Wall time: 412 ms\n" + "CPU times: user 79.9 ms, sys: 28.3 ms, total: 108 ms\n", + "Wall time: 1.12 s\n" ] } ], @@ -237,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 17, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -245,8 +244,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.86 ms, sys: 3.55 ms, total: 13.4 ms\n", - "Wall time: 87.5 ms\n" + "CPU times: user 367 ms, sys: 75.4 ms, total: 443 ms\n", + "Wall time: 3.8 s\n" ] } ], @@ -267,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 18, "id": "ee10d1f5", "metadata": {}, "outputs": [], @@ -285,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 19, "id": "16f24578", "metadata": {}, "outputs": [], @@ -295,44 +294,16 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 20, "id": "4adb7c3d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{}\n" - ] - }, - { - "ename": "AssertionError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [31]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(results_mapr)\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(added) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(removed) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(modified) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n", - "\u001b[0;31mAssertionError\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ - "print(results_mapr)\n", "assert len(added) == 0\n", "assert len(removed) == 0\n", "assert len(modified) == 0\n", "assert len(same) == len(ITEMS)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e26543b", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 4fc4518e9a79348309581b1e8d12c4eda25d8253 Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Thu, 8 Sep 2022 13:43:53 +0100 Subject: [PATCH 6/7] load possible values associated to a given key --- Search_Engine_test.ipynb | 116 +++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 23 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index e4daf211..a2f559e3 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -27,8 +27,9 @@ "outputs": [], "source": [ "INDEX_PAGE = \"https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1\"\n", - "SEARCH_ENGINE_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/\"\n", - "KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + \"?key={key}&value={value}\"" + "SEARCH_ENGINE_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/\"\n", + "KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + \"search/?key={key}&value={value}\"\n", + "KEYS_SEARCH = SEARCH_ENGINE_URL + \"searchvaluesusingkey/?key={key}\"" ] }, { @@ -72,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "id": "ec871b3e", "metadata": {}, "outputs": [], @@ -85,6 +86,63 @@ "ITEMS = [\"pax1\", \"pep\", \"blah\"]" ] }, + { + "cell_type": "markdown", + "id": "4db404f2", + "metadata": {}, + "source": [ + "### Load all the values for a specific key.\n", + "Only non empty value will be considered." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "dc18b7e1", + "metadata": {}, + "outputs": [], + "source": [ + "# Helper method to load the possible values for a given key\n", + "def load_values_for_given_key():\n", + " values = []\n", + " qs1 = {'type': 'image', 'key': KEY}\n", + " url = KEYS_SEARCH.format(**qs1) \n", + " json = session.get(url).json()\n", + " for d in json['data']:\n", + " if d['Value']:\n", + " values.append(d['Value'])\n", + " return values" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9a1034a5", + "metadata": {}, + "outputs": [], + "source": [ + "values = load_values_for_given_key()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ceb180d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "49753\n" + ] + } + ], + "source": [ + "values.sort()\n", + "print(len(values))" + ] + }, { "cell_type": "markdown", "id": "2c61954a", @@ -95,15 +153,15 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "id": "5c606dd9", "metadata": {}, "outputs": [], "source": [ "# Helper method retrieving the result using directly the search api\n", - "def load_using_search_api():\n", + "def load_using_search_api(values):\n", " results = {}\n", - " for item in ITEMS:\n", + " for item in values:\n", " ids = []\n", " qs1 = {'type': 'image', 'key': KEY, 'value': item}\n", " url = KEY_VALUE_SEARCH.format(**qs1) \n", @@ -127,15 +185,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "id": "7dcf6ab8", "metadata": {}, "outputs": [], "source": [ - "def get_items():\n", + "def get_items(values):\n", " items = []\n", " not_found = []\n", - " for item in ITEMS:\n", + " for item in values:\n", " qs1 = {'key': KEY_MAPR, 'value': item}\n", " url = MAPR_URL.format(**qs1)\n", " json = session.get(url).json()\n", @@ -169,9 +227,9 @@ " if i['id'] not in images:\n", " images.append(i['id'])\n", " \n", - "def load_using_mapr():\n", + "def load_using_mapr(values):\n", " results = {}\n", - " items, not_found = get_items()\n", + " items, not_found = get_items(values)\n", " images = []\n", " for item in items:\n", " qs1 = {'key': KEY_MAPR, 'value': item}\n", @@ -208,7 +266,19 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, + "id": "abbc2677", + "metadata": {}, + "outputs": [], + "source": [ + "# number of values to search for\n", + "s = 0\n", + "e = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -216,14 +286,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 79.9 ms, sys: 28.3 ms, total: 108 ms\n", - "Wall time: 1.12 s\n" + "CPU times: user 15.7 ms, sys: 5.02 ms, total: 20.8 ms\n", + "Wall time: 123 ms\n" ] } ], "source": [ "%%time\n", - "results = load_using_search_api()" + "results = load_using_search_api(values[s:e])" ] }, { @@ -236,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 13, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -244,14 +314,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 367 ms, sys: 75.4 ms, total: 443 ms\n", - "Wall time: 3.8 s\n" + "CPU times: user 9.54 ms, sys: 3.47 ms, total: 13 ms\n", + "Wall time: 37.3 ms\n" ] } ], "source": [ "%%time\n", - "results_mapr = load_using_mapr()" + "results_mapr = load_using_mapr(values[s:e])" ] }, { @@ -266,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "id": "ee10d1f5", "metadata": {}, "outputs": [], @@ -284,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "id": "16f24578", "metadata": {}, "outputs": [], @@ -294,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "id": "4adb7c3d", "metadata": {}, "outputs": [], @@ -302,7 +372,7 @@ "assert len(added) == 0\n", "assert len(removed) == 0\n", "assert len(modified) == 0\n", - "assert len(same) == len(ITEMS)" + "assert len(same) == e - s" ] } ], From 054c64ccf8ef64f3c90629d37ae858ac66f2f0e9 Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Wed, 14 Sep 2022 11:23:15 +0100 Subject: [PATCH 7/7] fix loading of data from projects --- Search_Engine_test.ipynb | 90 ++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/Search_Engine_test.ipynb b/Search_Engine_test.ipynb index a2f559e3..8bffa7c6 100644 --- a/Search_Engine_test.ipynb +++ b/Search_Engine_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "id": "39a371b9", "metadata": {}, "outputs": [], @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 18, "id": "d3b6c7f8", "metadata": {}, "outputs": [], @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "id": "12d45593", "metadata": {}, "outputs": [], @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 20, "id": "a92a373d", "metadata": {}, "outputs": [], @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 21, "id": "ec871b3e", "metadata": {}, "outputs": [], @@ -81,14 +81,12 @@ "# Key used by search engine\n", "KEY = \"Gene Symbol\"\n", "# Mapr equivalent key\n", - "KEY_MAPR = \"gene\"\n", - "# List of items to search for\n", - "ITEMS = [\"pax1\", \"pep\", \"blah\"]" + "KEY_MAPR = \"gene\"" ] }, { "cell_type": "markdown", - "id": "4db404f2", + "id": "f15c2932", "metadata": {}, "source": [ "### Load all the values for a specific key.\n", @@ -97,8 +95,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "dc18b7e1", + "execution_count": 22, + "id": "c6dfea10", "metadata": {}, "outputs": [], "source": [ @@ -116,8 +114,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "9a1034a5", + "execution_count": 47, + "id": "10898ac8", "metadata": {}, "outputs": [], "source": [ @@ -126,21 +124,22 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "ceb180d5", + "execution_count": 48, + "id": "1c759765", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "49753\n" - ] - } - ], + "outputs": [], + "source": [ + "values.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "599dbada", + "metadata": {}, + "outputs": [], "source": [ - "values.sort()\n", - "print(len(values))" + "#values = ['agap5']" ] }, { @@ -153,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 31, "id": "5c606dd9", "metadata": {}, "outputs": [], @@ -185,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 37, "id": "7dcf6ab8", "metadata": {}, "outputs": [], @@ -245,7 +244,7 @@ " url3 = PLATES_URL.format(**qs3)\n", " parse_annotation(images, session.get(url3).json(), item, s['name'], 'plates')\n", " for p in json['projects']:\n", - " item = s['extra']['value']\n", + " item = p['extra']['value']\n", " qs3 = {'key': KEY_MAPR, 'value': item, 'project_id': p['id']}\n", " url3 = DATASETS_URL.format(**qs3)\n", " parse_annotation(images, session.get(url3).json(), item, p['name'], 'datasets')\n", @@ -266,19 +265,19 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "abbc2677", + "execution_count": 50, + "id": "3b603ab7", "metadata": {}, "outputs": [], "source": [ "# number of values to search for\n", "s = 0\n", - "e = 1" + "e = 500" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 51, "id": "0f968d98", "metadata": {}, "outputs": [ @@ -286,8 +285,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 15.7 ms, sys: 5.02 ms, total: 20.8 ms\n", - "Wall time: 123 ms\n" + "CPU times: user 11.6 s, sys: 3.31 s, total: 14.9 s\n", + "Wall time: 3min 6s\n" ] } ], @@ -306,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 52, "id": "b2e40507", "metadata": {}, "outputs": [ @@ -314,8 +313,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.54 ms, sys: 3.47 ms, total: 13 ms\n", - "Wall time: 37.3 ms\n" + "[' ciz1', ' spen', '128up', '14-3-3epsilon', '14-3-3zeta', '140up', '15e2_human', '18w', '26-29-p', '2mit', '312', '4ehp', '5-ht1a', '5-ht1b', '5-ht2', '5-ht2a', '5-ht2b', '5-ht7', '5ptasei', '76p', '7a5', '7b2', '7h3', '825-oak', 'a', 'a10', 'a16', 'a1bg', 'a1bg-as1', 'a1cf', 'a1l167_human', 'a2agn1_human', 'a2bp1', 'a2bp1_human', 'a2ld1', 'a2m', 'a2m-as1', 'a2ml1', 'a2rum1_human', 'a3-3', 'a3galt2', 'a4galt', 'a4gnt', 'a5', 'a6', 'aaas', 'aac1', 'aac11', 'aac3', 'aacs', 'aad10', 'aad14', 'aad15', 'aad16', 'aad3', 'aad4', 'aad6', 'aadac', 'aadacl1', 'aadacl2', 'aadacl3', 'aadacl4', 'aadat', 'aaed1', 'aagab', 'aah1', 'aah3', 'aak1', 'aamdc', 'aamp', 'aanat', 'aap1', 'aar2', 'aars', 'aars2', 'aarsd1', 'aasdh', 'aasdhppt', 'aass', 'aat', 'aat1', 'aat2', 'aatf', 'aatk', 'aatk-as1', 'aats-ala', 'aats-ala-m', 'aats-arg', 'aats-asn', 'aats-asp', 'aats-cys', 'aats-gln', 'aats-glupro', 'aats-gly', 'aats-his', 'aats-ile', 'aats-leu', 'aats-lys', 'aats-met', 'aats-phe', 'aats-pro', 'aats-ser', 'aats-thr', 'aats-trp', 'aats-tyr', 'aats-tyr-m', 'aats-val', 'aay', 'ab', 'abat', 'abba', 'abba-1', 'abc1', 'abc2', 'abc3', 'abc4', 'abca1', 'abca10', 'abca11p', 'abca12', 'abca13', 'abca17p', 'abca2', 'abca3', 'abca4', 'abca5', 'abca6', 'abca7', 'abca8', 'abca9', 'abcb1', 'abcb10', 'abcb11', 'abcb4', 'abcb5', 'abcb6', 'abcb7', 'abcb8', 'abcb9', 'abcc1', 'abcc10', 'abcc11', 'abcc12', 'abcc13', 'abcc2', 'abcc3', 'abcc4', 'abcc5', 'abcc5-as1', 'abcc6', 'abcc6p1', 'abcc8', 'abcc9', 'abcd1', 'abcd2', 'abcd3', 'abcd4', 'abce1', 'abcf1', 'abcf2', 'abcf3', 'abcg1', 'abcg2', 'abcg4', 'abcg5', 'abcg8', 'abd-a', 'abd-b', 'abd1', 'abf1', 'abf2', 'abh1', 'abhd1', 'abhd10', 'abhd11', 'abhd11-as1', 'abhd12', 'abhd12b', 'abhd13', 'abhd14a', 'abhd14a-acy1', 'abhd14b', 'abhd15', 'abhd16a', 'abhd16b', 'abhd17a', 'abhd17b', 'abhd17c', 'abhd18', 'abhd2', 'abhd3', 'abhd4', 'abhd5', 'abhd6', 'abhd7', 'abhd8', 'abhd9', 'abi', 'abi1', 'abi2', 'abi3', 'abi3bp', 'abl', 'abl1', 'abl2', 'ablim1', 'ablim2', 'ablim3', 'abm1', 'abo', 'abo1', 'abo2', 'abp1', 'abp140', 'abp2', 'abr', 'abra', 'abracl', 'abraxas1', 'abraxas2', 'abs', 'abt1', 'abtb1', 'abtb2', 'abz1', 'abz2', 'ac', 'ac002398.9', 'ac004381.6', 'ac004556.1', 'ac004754.3', 'ac004801.3', 'ac004832.3', 'ac004876.2', 'ac005020.5', 'ac005075.2', 'ac005255.6', 'ac005838.2', 'ac006207.1', 'ac006372.8', 'ac006465.3', 'ac006477.5', 'ac006538.4', 'ac007390.5', 'ac007461.8', 'ac007785.3', 'ac007881.4', 'ac008132.1', 'ac008687.1', 'ac008687.5', 'ac008695.1', 'ac009086.6', 'ac009163.4', 'ac009171.3', 'ac009237.8', 'ac009336.13', 'ac009365.3', 'ac010282.1', 'ac010336.8', 'ac010422.7', 'ac010442.7', 'ac010531.1', 'ac010654.8', 'ac010872.2', 'ac011322.3', 'ac011462.1', 'ac011477.3', 'ac011491.7', 'ac012044.15', 'ac012476.1', 'ac013269.1', 'ac013469.8', 'ac015724.2', 'ac016757.3', 'ac017104.8', 'ac018413.10', 'ac018648.5', 'ac020907.6', 'ac020931.6', 'ac022414.1', 'ac023055.1', 'ac023055.2', 'ac025260.29', 'ac027369.8', 'ac027612.6', 'ac044860.6', 'ac064843.5', 'ac068338.14', 'ac073263.5', 'ac073896.1', 'ac074378.4', 'ac074397.7', 'ac079612.1', 'ac079753.7', 'ac079776.5', 'ac084219.4', 'ac091047.10', 'ac091801.1', 'ac091842.3', 'ac091959.3', 'ac092165.4', 'ac092653.3', 'ac092718.3', 'ac092718.8', 'ac092865.1', 'ac093662.4', 'ac093726.4', 'ac097381.1', 'ac098976.2', 'ac099548.4', 'ac103819.3', 'ac104127.2', 'ac104534.3', 'ac104563.14', 'ac109446.2', 'ac109587.2', 'ac109599.4', 'ac109825.4', 'ac110781.3', 'ac113385.3', 'ac113420.1', 'ac113612.3', 'ac116655.7', 'ac117460.7', 'ac117494.2', 'ac126283.2', 'ac129492.6', 'ac130364.5', 'ac131280.9', 'ac133644.1', 'ac135776.3', 'ac135983.4', 'ac136352.1', 'ac136632.3', 'ac136759.4', 'ac138035.2', 'ac138969.4', 'ac139337.5', 'ac139425.3', 'ac13e', 'ac145212.1', 'ac145676.2', 'ac171558.1', 'ac171558.2', 'ac233263.1', 'ac240274.1', 'ac3', 'ac76e', 'ac78c', 'aca1', 'acaa1', 'acaa2', 'acaca', 'acacb', 'acad10', 'acad11', 'acad8', 'acad9', 'acadl', 'acadm', 'acads', 'acadsb', 'acadvl', 'acam', 'acan', 'acap1', 'acap2', 'acap3', 'acat1', 'acat2', 'acb1', 'acbd3', 'acbd4', 'acbd5', 'acbd6', 'acbd7', 'acc', 'acc1', 'accn1', 'accn2', 'accn3', 'accn4', 'accn5', 'accoas', 'accs', 'accsl', 'acd', 'ace', 'ace2', 'acer', 'acer1', 'acer2', 'acer3', 'acf1', 'acf2', 'acf4', 'ach1', 'ache', 'achi', 'acin1', 'acj6', 'ack', 'ack-like', 'ack1', 'ackr1', 'ackr2', 'ackr3', 'acly', 'acm1', 'acmsd', 'acn', 'acn9', 'aco1', 'aco2', 'acod1', 'acon', 'acot1', 'acot11', 'acot12', 'acot13', 'acot2', 'acot4', 'acot6', 'acot7', 'acot8', 'acot9', 'acox1', 'acox2', 'acox3', 'acox57d-d', 'acox57d-p', 'acoxl', 'acp1', 'acp2', 'acp26aa', 'acp26ab', 'acp29ab', 'acp32cd', 'acp36de', 'acp5', 'acp53c14a', 'acp53c14b', 'acp53c14c', 'acp53ea', 'acp6', 'acp62f', 'acp63f', 'acp65aa', 'acp7', 'acp70a', 'acp76a', 'acp95ef', 'acph-1', 'acpl2', 'acpp', 'acpt', 'acr', 'acrbp', 'acrc', 'acrol_human', 'acrv1', 'acs1', 'acsbg1', 'acsbg2', 'acsf2', 'acsf3', 'acsl', 'acsl1', 'acsl3', 'acsl4', 'acsl5', 'acsl6', 'acsm1', 'acsm2a', 'acsm2b', 'acsm3', 'acsm4', 'acsm5', 'acsm6', 'acss1', 'acss2', 'acss3', 'act1', 'act42a', 'act57b', 'act5c', 'act79b', 'act87e', 'act88f', 'acta1', 'acta2', 'actb', 'actbeta', 'actbl2', 'actbl3', 'actc', 'actc1', 'actg1', 'actg2', 'actl10']\n", + "CPU times: user 3min 1s, sys: 15.4 s, total: 3min 16s\n", + "Wall time: 18min 18s\n" ] } ], @@ -336,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 41, "id": "ee10d1f5", "metadata": {}, "outputs": [], @@ -354,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 42, "id": "16f24578", "metadata": {}, "outputs": [], @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 43, "id": "4adb7c3d", "metadata": {}, "outputs": [], @@ -374,6 +374,14 @@ "assert len(modified) == 0\n", "assert len(same) == e - s" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8100d189", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {