Skip to content

Commit

Permalink
Merge pull request #26 from 4dn-dcic/0.4.6
Browse files Browse the repository at this point in the history
0.4.6
  • Loading branch information
Carl Vitzthum authored Aug 20, 2018
2 parents 7017b17 + 1fa0c85 commit e9519fb
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 19 deletions.
2 changes: 1 addition & 1 deletion dcicutils/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Version information."""

# The following line *must* be the last in the module, exactly as formatted:
__version__ = "0.4.5"
__version__ = "0.4.6"
4 changes: 4 additions & 0 deletions dcicutils/beanstalk_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def delete_db(db_identifier, take_snapshot=True):


def get_health_page_info(bs_url):
"""
Different use cases than ff_utils.get_health_page (that one is oriented
towards external API usage and this one is more internal)
"""
if not bs_url.endswith('/'):
bs_url += "/"
if not bs_url.startswith('http'):
Expand Down
37 changes: 27 additions & 10 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,27 +351,44 @@ def get_es_search_generator(es_client, index, body, page_size=50):
def get_es_metadata(uuids, es_client=None, key=None, ff_env=None):
"""
Given a list of string item uuids, will return a
dictionary response of the full ES ecord for that item (or an empty
dictionary if the item doesn't exist/ is not indexed)
dictionary response of the full ES record for those items (or an empty
dictionary if the items don't exist/ are not indexed)
You can pass in an Elasticsearch client (initialized by create_es_client)
through the es_client param to save init time.
Same auth mechanism as the other metadata functions
"""
if es_client is None:
# need to know ES server location and item type
auth = get_authentication_with_server(key, ff_env)
health_res = authorized_request(auth['server'] + '/health', auth=auth, verb='GET')
es_url = get_response_json(health_res)['elasticsearch']
es_url = get_health_page(key, ff_env)['elasticsearch']
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
# match all given uuids to _id fields
es_query = {'query': {'terms': {'_id': uuids}}, 'sort': [{'_uid': {'order': 'desc'}}]}
# sending in too many uuids in the terms query can crash es; break them up
# into groups of max size 100
es_res = []
for es_page in get_es_search_generator(es_client, '_all', es_query):
# return the document source only; eliminate es metadata
es_res.extend([hit['_source'] for hit in es_page])
for i in range(0, len(uuids), 100):
query_uuids = uuids[i:i + 100]
es_query = {'query': {'terms': {'_id': query_uuids}},
'sort': [{'_uid': {'order': 'desc'}}]}
for es_page in get_es_search_generator(es_client, '_all', es_query):
# return the document source only; eliminate es metadata
es_res.extend([hit['_source'] for hit in es_page])
return es_res


def get_health_page(key=None, ff_env=None):
"""
Simple function to return the json for a FF health page given keys or
ff_env. Will return json containing an error rather than raising an
exception if this fails, since this function should tolerate failure
"""
try:
auth = get_authentication_with_server(key, ff_env)
health_res = authorized_request(auth['server'] + '/health', auth=auth, verb='GET')
ret = get_response_json(health_res)
except Exception as exc:
ret = {'error': str(exc)}
return ret


#####################
# Utility functions #
#####################
Expand Down
34 changes: 26 additions & 8 deletions test/test_ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,8 @@ def test_get_es_metadata(integrated_ff):
assert biosample_res['uuid'] == test_biosample
assert biosample_res['item_type'] == 'biosample'

# you can also pass in your own elasticsearch client
# ugly here because we need to get it from health page
health_res = ff_utils.authorized_request(integrated_ff['ff_key']['server'] + '/health',
auth=integrated_ff['ff_key'])
es_url = ff_utils.get_response_json(health_res)['elasticsearch']
# you can pass in your own elasticsearch client or build it here
es_url = ff_utils.get_health_page(key=integrated_ff['ff_key'])['elasticsearch']
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
res2 = ff_utils.get_es_metadata([test_biosource], es_client=es_client,
key=integrated_ff['ff_key'])
Expand All @@ -377,13 +374,19 @@ def test_get_es_metadata(integrated_ff):
res = ff_utils.get_es_metadata(['blahblah'], key=integrated_ff['ff_key'])
assert res == []

# make sure searches work with pagination set at 100 (default)
all_items = ff_utils.search_metadata('/search/?type=Item&frame=object', key=integrated_ff['ff_key'])
all_uuids = [item['uuid'] for item in all_items]
all_es = ff_utils.get_es_metadata(all_uuids, key=integrated_ff['ff_key'])
assert len(all_es) == len(all_uuids)
all_es_uuids = [item['uuid'] for item in all_es]
assert set(all_es_uuids) == set(all_uuids)


def test_get_es_search_generator(integrated_ff):
from dcicutils import es_utils
# get es_client info from the health page
health_res = ff_utils.authorized_request(integrated_ff['ff_key']['server'] + '/health',
auth=integrated_ff['ff_key'])
es_url = ff_utils.get_response_json(health_res)['elasticsearch']
es_url = ff_utils.get_health_page(key=integrated_ff['ff_key'])['elasticsearch']
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
es_query = {'query': {'match_all': {}}, 'sort': [{'_uid': {'order': 'desc'}}]}
# search for all ontology terms with a low pagination size
Expand All @@ -402,3 +405,18 @@ def test_get_es_search_generator(integrated_ff):
key=integrated_ff['ff_key'])
search_uuids = set(hit['uuid'] for hit in search_res)
assert all_es_uuids == search_uuids


def test_get_health_page(integrated_ff):
health_res = ff_utils.get_health_page(key=integrated_ff['ff_key'])
assert health_res and 'error' not in health_res
assert 'elasticsearch' in health_res
assert 'database' in health_res
assert health_res['beanstalk_env'] == integrated_ff['ff_env']
# try with ff_env instead of key
health_res2 = ff_utils.get_health_page(ff_env=integrated_ff['ff_env'])
assert health_res2 and 'error' not in health_res2
assert health_res2['elasticsearch'] == health_res['elasticsearch']
# make sure it's error tolerant
bad_health_res = ff_utils.get_health_page(ff_env='not_an_env')
assert bad_health_res and 'error' in bad_health_res

0 comments on commit e9519fb

Please sign in to comment.