Skip to content

Commit

Permalink
Merge pull request #74 from 4dn-dcic/indexer_env
Browse files Browse the repository at this point in the history
C4-137 Provision Indexer Deployment
  • Loading branch information
willronchetti authored Apr 28, 2020
2 parents 4f5ab46 + 5906fba commit 2532841
Show file tree
Hide file tree
Showing 20 changed files with 603 additions and 72 deletions.
31 changes: 25 additions & 6 deletions dcicutils/deployment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main():
import argparse

from dcicutils.env_utils import (
is_stg_or_prd_env, prod_bucket_env, get_standard_mirror_env, data_set_for_env,
is_stg_or_prd_env, prod_bucket_env, get_standard_mirror_env, data_set_for_env, INDEXER_ENVS
)
from dcicutils.misc_utils import PRINT

Expand All @@ -44,7 +44,7 @@ class Deployer:
@classmethod
def build_ini_file_from_template(cls, template_file_name, init_file_name,
bs_env=None, bs_mirror_env=None, s3_bucket_env=None,
data_set=None, es_server=None, es_namespace=None):
data_set=None, es_server=None, es_namespace=None, indexer=False):
"""
Builds a .ini file from a given template file.
Expand All @@ -57,6 +57,7 @@ def build_ini_file_from_template(cls, template_file_name, init_file_name,
data_set (str): An identifier for data to load (either 'prod' for prd/stg envs, or 'test' for others)
es_server (str): The server name (or server:port) for the ElasticSearch server.
es_namespace (str): The ElasticSearch namespace to use (probably but not necessarily same as bs_env).
indexer (bool): Whether or not we are building an ini file for an indexer.
"""
with io.open(init_file_name, 'w') as init_file_fp:
cls.build_ini_stream_from_template(template_file_name=template_file_name,
Expand All @@ -66,7 +67,8 @@ def build_ini_file_from_template(cls, template_file_name, init_file_name,
s3_bucket_env=s3_bucket_env,
data_set=data_set,
es_server=es_server,
es_namespace=es_namespace)
es_namespace=es_namespace,
indexer=indexer)

# Ref: https://stackoverflow.com/questions/19911123/how-can-you-get-the-elastic-beanstalk-application-version-in-your-application # noqa: E501
EB_MANIFEST_FILENAME = "/opt/elasticbeanstalk/deploy/manifest"
Expand Down Expand Up @@ -104,7 +106,7 @@ def get_app_version(cls): # This logic (perhaps most or all of this file) shoul
@classmethod
def build_ini_stream_from_template(cls, template_file_name, init_file_stream,
bs_env=None, bs_mirror_env=None, s3_bucket_env=None, data_set=None,
es_server=None, es_namespace=None):
es_server=None, es_namespace=None, indexer=False):
"""
Sends output to init_file_stream corresponding to the data noe would want in an ini file
for the given template_file_name and available environment variables.
Expand All @@ -118,13 +120,13 @@ def build_ini_stream_from_template(cls, template_file_name, init_file_stream,
data_set: 'test' or 'prod'. Default is 'test' unless bs_env is a staging or production environment.
es_server: The name of an es server to use.
es_namespace: The namespace to use on the es server. If None, this uses the bs_env.
indexer: Whether or not we are building an ini file for an indexer.
Returns: None
"""

# print("data_set given = ", data_set)

es_server = es_server or os.environ.get('ENCODED_ES_SERVER', "MISSING_ENCODED_ES_SERVER")
bs_env = bs_env or os.environ.get("ENCODED_BS_ENV", "MISSING_ENCODED_BS_ENV")
bs_mirror_env = bs_mirror_env or os.environ.get("ENCODED_BS_MIRROR_ENV", get_standard_mirror_env(bs_env)) or ""
Expand All @@ -135,6 +137,11 @@ def build_ini_stream_from_template(cls, template_file_name, init_file_stream,
data_set = data_set or os.environ.get("ENCODED_DATA_SET",
data_set_for_env(bs_env) or "MISSING_ENCODED_DATA_SET")
es_namespace = es_namespace or os.environ.get("ENCODED_ES_NAMESPACE", bs_env)
# Set ENCODED_INDEXER to 'true' to deploy an indexer.
# If the value is missing, the empty string, or any other thing besides 'true' (in any case),
# this value will default to the empty string, causing the line not to appear in the output file
# because there is a special case that suppresses output of empty values. -kmp 27-Apr-2020
indexer = "true" if indexer or os.environ.get('ENCODED_INDEXER', "false").upper() == "TRUE" else ""

# print("data_set computed = ", data_set)

Expand All @@ -147,8 +154,15 @@ def build_ini_stream_from_template(cls, template_file_name, init_file_stream,
'S3_BUCKET_ENV': s3_bucket_env,
'DATA_SET': data_set,
'ES_NAMESPACE': es_namespace,
'INDEXER': indexer,
}

# if we specify an indexer name for bs_env, we did the deployment wrong and should bail
if bs_env in INDEXER_ENVS:
raise RuntimeError("Deployed with bs_env %s, which is an indexer env."
"Re-deploy with the env you want to index and set the 'ENCODED.INDEXER'"
"environment variable." % bs_env)

# We assume these variables are not set, but best to check first. Confusion might result otherwise.
for extra_var in extra_vars:
if extra_var in os.environ:
Expand Down Expand Up @@ -239,6 +253,10 @@ def main(cls):
parser.add_argument("--es_namespace",
help="an ElasticSearch namespace",
default=None)
parser.add_argument("--indexer",
help="whether or not to deploy an indexer",
action='store_true',
default=False)
args = parser.parse_args()
template_file_name = cls.environment_template_filename(args.env)
ini_file_name = args.target
Expand All @@ -247,7 +265,8 @@ def main(cls):
cls.build_ini_file_from_template(template_file_name, ini_file_name,
bs_env=args.bs_env, bs_mirror_env=args.bs_mirror_env,
s3_bucket_env=args.s3_bucket_env, data_set=args.data_set,
es_server=args.es_server, es_namespace=args.es_namespace)
es_server=args.es_server, es_namespace=args.es_namespace,
indexer=args.indexer)
except Exception as e:
PRINT("Error (%s): %s" % (e.__class__.__name__, e))
sys.exit(1)
Expand Down
5 changes: 5 additions & 0 deletions dcicutils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FF_ENV_WEBPROD = 'fourfront-webprod'
FF_ENV_WEBPROD2 = 'fourfront-webprod2'
FF_ENV_WOLF = 'fourfront-wolf'
FF_ENV_INDEXER = 'fourfront-indexer' # to be used by ELB Indexer

CGAP_ENV_DEV = 'fourfront-cgapdev'
CGAP_ENV_HOTSEAT = 'fourfront-cgaphotseat' # Maybe not used
Expand All @@ -22,6 +23,7 @@
CGAP_ENV_WEBPROD = 'fourfront-cgap'
# CGAP_ENV_WEBPROD2 is meaningless here. See CGAP_ENV_STAGING.
CGAP_ENV_WOLF = 'fourfront-cgapwolf' # Maybe not used
CGAP_ENV_INDEXER = 'cgap-indexer' # to be used by ELB Indexer

CGAP_ENV_DEV_NEW = 'cgap-dev'
CGAP_ENV_HOTSEAT_NEW = 'cgap-hotseat'
Expand All @@ -43,6 +45,9 @@
FOURFRONT_STG_OR_PRD_TOKENS = ['webprod', 'blue', 'green']
FOURFRONT_STG_OR_PRD_NAMES = ['staging', 'stagging', 'data']

# We should know which BS Envs are indexing envs
INDEXER_ENVS = [FF_ENV_INDEXER, CGAP_ENV_INDEXER]

# Done this way because it's safer going forward.
CGAP_STG_OR_PRD_TOKENS = []
CGAP_STG_OR_PRD_NAMES = [CGAP_ENV_WEBPROD, CGAP_ENV_PRODUCTION_GREEN, CGAP_ENV_PRODUCTION_BLUE,
Expand Down
23 changes: 19 additions & 4 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from .misc_utils import PRINT
import requests
from elasticsearch.exceptions import AuthorizationException
# urlparse import differs between py2 and 3
if sys.version_info[0] < 3:
import urlparse
Expand Down Expand Up @@ -697,7 +698,7 @@ def delete_field(obj_id, del_field, key=None, ff_env=None):

def get_es_search_generator(es_client, index, body, page_size=200):
"""
Simple generator behind get_es_metada which takes an es_client (from
Simple generator behind get_es_metadata which takes an es_client (from
es_utils create_es_client), a string index, and a dict query body.
Also takes an optional string page_size, which controls pagination size
NOTE: 'index' must be namespaced
Expand Down Expand Up @@ -883,7 +884,8 @@ def expand_es_metadata(uuid_list, key=None, ff_env=None, store_frame='raw', add_
add_pc_wfr (bool): Include workflow_runs and linked items (processed/ref files, wf, software...)
ignore_field(list): Remove keys from items, so any linking through these fields, ie relations
use_generator (bool): Use a generator when getting es. Less memory used but takes longer
es_client: optional result from es_utils.create_es_client
es_client: optional result from es_utils.create_es_client - note this could be regenerated
in this method if the signature expires
Returns:
dict: contains all item types as keys, and with values of list of dictionaries
i.e.
Expand Down Expand Up @@ -930,8 +932,21 @@ def remove_keys(my_dict, remove_list):

while uuid_list:
uuids_to_check = [] # uuids to add to uuid_list if not if not in item_uuids
for es_item in get_es_metadata(uuid_list, es_client=es_client, chunk_size=chunk,
is_generator=use_generator, key=auth):

# get the next page of data, recreating the es_client if need be
try:
current_page = get_es_metadata(uuid_list, es_client=es_client, chunk_size=chunk,
is_generator=use_generator, key=auth)
except AuthorizationException: # our signature expired, recreate the es_client with a fresh signature
if es_url:
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
else: # recreate client and try again - if we fail here, exception should propagate
es_url = get_health_page(key=auth)['elasticsearch']
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)

current_page = get_es_metadata(uuid_list, es_client=es_client, chunk_size=chunk,
is_generator=use_generator, key=auth)
for es_item in current_page:
# get object type via es result and schema for storing
obj_type = es_item['object']['@type'][0]
obj_key = schema_name[obj_type]
Expand Down
100 changes: 100 additions & 0 deletions dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
"""

import os
import logging
import webtest # importing the library makes it easier to mock testing


# Is this the right place for this? I feel like this should be done in an application, not a library.
# -kmp 27-Apr-2020
logging.basicConfig()


# Using PRINT(...) for debugging, rather than its more familiar lowercase form) for intended programmatic output,
Expand All @@ -11,6 +18,99 @@
PRINT = print


class VirtualAppError(Exception):
""" Special Exception to be raised by VirtualApp that contains some additional info """

def __init__(self, msg, url, body, e):
super(VirtualAppError, self).__init__(msg)
self.msg = msg
self.query_url = url
self.query_body = body
self.raw_exception = e

def __repr__(self):
return "Exception encountered on VirtualApp\n" \
"URL: %s\n" \
"BODY: %s\n" \
"MSG: %s\n" \
"Raw Exception: %s\n" % (self.query_url, self.query_body, self.msg, self.raw_exception)

def __str__(self):
return self.__repr__()


class _VirtualAppHelper(webtest.TestApp): # effectively disguises 'TestApp'
pass


class VirtualApp:
"""
Wrapper class for TestApp, to allow custom control over submitting Encoded requests,
simulating a number of conditions, including permissions.
IMPORTANT: We use webtest.TestApp is used as substrate technology here, but use of this class
occurs in the main application, not just in testing. Among other things, we have
renamed the app here in order to avoid confusions created by the name when it is used
in production settings.
"""
HELPER_CLASS = _VirtualAppHelper

def __init__(self, app, environ):
"""
Builds an encoded application, allowing you to submit requests to an encoded application
:param app: return value of get_app(config_uri, app_name)
:param environ: options to pass to the application. Usually permissions.
"""
# NOTE: The TestApp class that we're wrapping takes a richer set of initialization parameters
# (including relative_to, use_unicode, cookiejar, parser_features, json_encoder, and lint),
# but we'll add them conservatively here. If there is a need for any of them, we should add
# them explicitly here one-by-one as the need is shown so we have tight control of what
# we're depending on and what we're not. -kmp 27-Apr-2020
self.wrapped_app = self.HELPER_CLASS(app, environ)

def get(self, url, **kwargs):
""" Wrapper for TestApp.get that logs the outgoing GET
:param url: url to GET
:param kwargs: args to pass to the GET
:return: result of GET
"""
logging.info('OUTGOING HTTP GET: %s' % url)
try:
return self.wrapped_app.get(url, **kwargs)
except webtest.AppError as e:
raise VirtualAppError(msg='HTTP GET failed.', url=url, body='<empty>', e=str(e))

def post_json(self, url, obj, **kwargs):
""" Wrapper for TestApp.post_json that logs the outgoing POST
:param url: url to POST to
:param obj: object body to POST
:param kwargs: args to pass to the POST
:return: result of POST
"""
logging.info('OUTGOING HTTP POST on url: %s with object: %s' % (url, obj))
try:
return self.wrapped_app.post_json(url, obj, **kwargs)
except webtest.AppError as e:
raise VirtualAppError(msg='HTTP POST failed.', url=url, body=obj, e=str(e))

def patch_json(self, url, fields, **kwargs):
""" Wrapper for TestApp.patch_json that logs the outgoing PATCH
:param url: url to PATCH to, should contain an object uuid
:param fields: fields to PATCH on uuid in URL
:param kwargs: args to pass to the PATCH
:return: result of PATCH
"""
logging.info('OUTGOING HTTP PATCH on url: %s with changes: %s' % (url, fields))
try:
return self.wrapped_app.patch_json(url, fields, **kwargs)
except webtest.AppError as e:
raise VirtualAppError(msg='HTTP PATCH failed.', url=url, body=fields, e=str(e))


def ignored(*args, **kwargs):
"""
This is useful for defeating flake warnings.
Expand Down
Loading

0 comments on commit 2532841

Please sign in to comment.