From 5bf02dcdeaf7f0ab20d03684dd95121d6b0ce493 Mon Sep 17 00:00:00 2001 From: Tom Reitz Date: Fri, 22 Nov 2024 12:03:37 -0600 Subject: [PATCH] implementing validate references selector, behavior and remote switch, plus update docs --- README.md | 8 +++++++ lightbeam/validate.py | 50 ++++++++++++++++++++++++++++--------------- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7c12a0f..d631641 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,12 @@ validate: - references # checks that references resolve, either locally or in the remote API # or # methods: "*" + references: + selector: + - studentAssessments.studentReference + - studentSchoolAssociations.schoolReference + behavior: exclude # or `include` + remote: False # default=True ``` Default `validate`.`methods` are `["schema", "descriptors", "uniqueness"]` (not `references`; see below). In addition to the above methods, `lighteam validate` will also (first) check that each payload is valid JSON. @@ -167,6 +173,8 @@ This is optional; if absent, references in every payload are checked, no matter * `fetch`ed data becoming stale over time * needing to track which data is your own vs. was `fetch`ed (all the data must coexist in the `config.data_dir` to be discoverable by `lightbeam validate`) +You may specify a `selector` list of the form `someEndpoint.path.to.someReference` to include or exclude (according to `behavior`) specific references from reference validation. You may also specity `remote: False` to only validate references against local data in your JSONL files. + ## `send` ```bash diff --git a/lightbeam/validate.py b/lightbeam/validate.py index 3e4619e..f153853 100644 --- a/lightbeam/validate.py +++ b/lightbeam/validate.py @@ -39,7 +39,17 @@ def validate(self): if type(self.validation_methods)==str and (self.validation_methods=="*" or self.validation_methods.lower()=='all'): self.validation_methods = self.DEFAULT_VALIDATION_METHODS self.validation_methods.append("references") - + self.validation_references_selector = self.lightbeam.config.get("validate",{}).get("references",{}).get("selector", []) + for selector in self.validation_references_selector: + if "." not in selector: + self.logger.error(f"`config.validate.references.selector` {selector} is incorrectly formatted (should be `someEndpoint.someReference`, such as `studentSchoolAssociation.schoolReference`)") + self.validation_references_behavior = self.lightbeam.config.get("validate",{}).get("references",{}).get("behavior", "exclude") + if self.validation_references_behavior not in ["exclude", "include"]: + self.logger.error(f"`config.validate.references.behavior` must be either `exclude` (default) or `include`)") + self.validation_references_remote = self.lightbeam.config.get("validate",{}).get("references",{}).get("remote", True) + if "references" in self.validation_methods and not self.validation_references_remote: + self.logger.info(f"(references will only be validated against local data, since `config.validate.references.remote: False`)") + self.lightbeam.api.load_swagger_docs() self.logger.info(f"validating by methods {self.validation_methods}...") if "descriptors" in self.validation_methods: @@ -290,7 +300,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter): # check references values are valid if "references" in self.validation_methods and "Descriptor" not in endpoint: # Descriptors have no references self.lightbeam.api.do_oauth() - error_message = self.has_invalid_references(payload, path="") + error_message = self.has_invalid_references(endpoint, payload, path="") if error_message != "": self.log_validation_error(endpoint, file_name, line_counter, "references", error_message) @@ -360,40 +370,46 @@ def has_invalid_descriptor_values(self, payload, path=""): return "" # Validates descriptor values for a single payload (returns an error message or empty string) - def has_invalid_references(self, payload, path=""): + def has_invalid_references(self, endpoint, payload, path=""): for k in payload.keys(): if isinstance(payload[k], dict) and not k.endswith("Reference"): - value = self.has_invalid_references(payload[k], path+("." if path!="" else "")+k) + value = self.has_invalid_references(endpoint, payload[k], path+("." if path!="" else "")+k) if value!="": return value elif isinstance(payload[k], list): for i in range(0, len(payload[k])): - value = self.has_invalid_references(payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]") + value = self.has_invalid_references(endpoint, payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]") if value!="": return value elif isinstance(payload[k], dict) and k.endswith("Reference"): + check_this_reference = ( + (f"{endpoint}.{path}{k}" in self.validation_references_selector and self.validation_references_behavior=="include") + or (f"{endpoint}.{path}{k}" not in self.validation_references_selector and self.validation_references_behavior=="exclude") + ) + if not check_this_reference: continue is_valid_reference = False original_endpoint = util.pluralize_endpoint(k.replace("Reference","")) + params = payload[k].copy() + if "link" in params.keys(): del params["link"] + # this deals with the fact that an educationOrganizationReference may be to a school, LEA, etc.: endpoints_to_check = self.EDFI_GENERICS_TO_RESOURCES_MAPPING.get(original_endpoint, [original_endpoint]) - for endpoint in endpoints_to_check: + for endpt in endpoints_to_check: # check if it's a local reference: - if endpoint not in self.local_reference_cache.keys(): break + if endpt not in self.local_reference_cache.keys(): break # construct cache_key for reference - cache_key = self.get_cache_key(payload[k]) - if cache_key in self.local_reference_cache[endpoint]: + cache_key = self.get_cache_key(params) + if cache_key in self.local_reference_cache[endpt]: is_valid_reference = True break - if not is_valid_reference: # not found in local data... - for endpoint in endpoints_to_check: + if not is_valid_reference and self.validation_references_remote: # not found in local data... + for endpt in endpoints_to_check: # check if it's a remote reference: - params = payload[k].copy() - if "link" in params.keys(): del params["link"] - value = self.remote_reference_exists(endpoint, params) + value = self.remote_reference_exists(endpt, params) if value: is_valid_reference = True break - if not is_valid_reference: - return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params) + if not is_valid_reference: + return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params) return "" # Tells you if a specified descriptor value is valid or not @@ -443,7 +459,7 @@ def remote_reference_exists(self, endpoint, params): else: pass # await asyncio.sleep(1) curr_token_version = int(str(self.lightbeam.token_version)) - elif status=='404': + elif status=='404' or status=='400': return False elif status in ['200', '201']: # 200 response might still return zero matching records...