Skip to content

implementing validate references selector, behavior and remote switch, plus update docs #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ validate:
- references # checks that references resolve, either locally or in the remote API
# or
# methods: "*"
references:
selector:
- studentAssessments.studentReference
- studentSchoolAssociations.schoolReference
behavior: exclude # or `include`
remote: False # default=True
```
Default `validate`.`methods` are `["schema", "descriptors", "uniqueness"]` (not `references`; see below). In addition to the above methods, `lighteam validate` will also (first) check that each payload is valid JSON.

Expand All @@ -167,6 +173,8 @@ This is optional; if absent, references in every payload are checked, no matter
* `fetch`ed data becoming stale over time
* needing to track which data is your own vs. was `fetch`ed (all the data must coexist in the `config.data_dir` to be discoverable by `lightbeam validate`)

You may specify a `selector` list of the form `someEndpoint.path.to.someReference` to include or exclude (according to `behavior`) specific references from reference validation. You may also specity `remote: False` to only validate references against local data in your JSONL files.


## `send`
```bash
Expand Down
50 changes: 33 additions & 17 deletions lightbeam/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,17 @@ def validate(self):
if type(self.validation_methods)==str and (self.validation_methods=="*" or self.validation_methods.lower()=='all'):
self.validation_methods = self.DEFAULT_VALIDATION_METHODS
self.validation_methods.append("references")

self.validation_references_selector = self.lightbeam.config.get("validate",{}).get("references",{}).get("selector", [])
for selector in self.validation_references_selector:
if "." not in selector:
self.logger.error(f"`config.validate.references.selector` {selector} is incorrectly formatted (should be `someEndpoint.someReference`, such as `studentSchoolAssociation.schoolReference`)")
self.validation_references_behavior = self.lightbeam.config.get("validate",{}).get("references",{}).get("behavior", "exclude")
if self.validation_references_behavior not in ["exclude", "include"]:
self.logger.error(f"`config.validate.references.behavior` must be either `exclude` (default) or `include`)")
self.validation_references_remote = self.lightbeam.config.get("validate",{}).get("references",{}).get("remote", True)
if "references" in self.validation_methods and not self.validation_references_remote:
self.logger.info(f"(references will only be validated against local data, since `config.validate.references.remote: False`)")

self.lightbeam.api.load_swagger_docs()
self.logger.info(f"validating by methods {self.validation_methods}...")
if "descriptors" in self.validation_methods:
Expand Down Expand Up @@ -290,7 +300,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
# check references values are valid
if "references" in self.validation_methods and "Descriptor" not in endpoint: # Descriptors have no references
self.lightbeam.api.do_oauth()
error_message = self.has_invalid_references(payload, path="")
error_message = self.has_invalid_references(endpoint, payload, path="")
if error_message != "":
self.log_validation_error(endpoint, file_name, line_counter, "references", error_message)

Expand Down Expand Up @@ -360,40 +370,46 @@ def has_invalid_descriptor_values(self, payload, path=""):
return ""

# Validates descriptor values for a single payload (returns an error message or empty string)
def has_invalid_references(self, payload, path=""):
def has_invalid_references(self, endpoint, payload, path=""):
for k in payload.keys():
if isinstance(payload[k], dict) and not k.endswith("Reference"):
value = self.has_invalid_references(payload[k], path+("." if path!="" else "")+k)
value = self.has_invalid_references(endpoint, payload[k], path+("." if path!="" else "")+k)
if value!="": return value
elif isinstance(payload[k], list):
for i in range(0, len(payload[k])):
value = self.has_invalid_references(payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
value = self.has_invalid_references(endpoint, payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
if value!="": return value
elif isinstance(payload[k], dict) and k.endswith("Reference"):
check_this_reference = (
(f"{endpoint}.{path}{k}" in self.validation_references_selector and self.validation_references_behavior=="include")
or (f"{endpoint}.{path}{k}" not in self.validation_references_selector and self.validation_references_behavior=="exclude")
)
if not check_this_reference: continue
is_valid_reference = False
original_endpoint = util.pluralize_endpoint(k.replace("Reference",""))

params = payload[k].copy()
if "link" in params.keys(): del params["link"]

# this deals with the fact that an educationOrganizationReference may be to a school, LEA, etc.:
endpoints_to_check = self.EDFI_GENERICS_TO_RESOURCES_MAPPING.get(original_endpoint, [original_endpoint])
for endpoint in endpoints_to_check:
for endpt in endpoints_to_check:
# check if it's a local reference:
if endpoint not in self.local_reference_cache.keys(): break
if endpt not in self.local_reference_cache.keys(): break
# construct cache_key for reference
cache_key = self.get_cache_key(payload[k])
if cache_key in self.local_reference_cache[endpoint]:
cache_key = self.get_cache_key(params)
if cache_key in self.local_reference_cache[endpt]:
is_valid_reference = True
break
if not is_valid_reference: # not found in local data...
for endpoint in endpoints_to_check:
if not is_valid_reference and self.validation_references_remote: # not found in local data...
for endpt in endpoints_to_check:
# check if it's a remote reference:
params = payload[k].copy()
if "link" in params.keys(): del params["link"]
value = self.remote_reference_exists(endpoint, params)
value = self.remote_reference_exists(endpt, params)
if value:
is_valid_reference = True
break
if not is_valid_reference:
return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params)
if not is_valid_reference:
return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params)
return ""

# Tells you if a specified descriptor value is valid or not
Expand Down Expand Up @@ -443,7 +459,7 @@ def remote_reference_exists(self, endpoint, params):
else:
pass # await asyncio.sleep(1)
curr_token_version = int(str(self.lightbeam.token_version))
elif status=='404':
elif status=='404' or status=='400':
return False
elif status in ['200', '201']:
# 200 response might still return zero matching records...
Expand Down