-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CERTTF-388] [CERTTF-443] Implement masking on the agent #404
base: main
Are you sure you want to change the base?
Changes from all commits
98039bf
c6b086c
bd589ba
907068e
27d1da4
cc30046
9447fd8
e304954
b1cf651
a18568c
fe87f6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,8 @@ | |
import time | ||
|
||
from testflinger_agent.errors import TFServerError | ||
from .runner import CommandRunner, RunnerEvents | ||
from testflinger_agent.masking import Masker | ||
from .runner import CommandRunner, MaskingCommandRunner, RunnerEvents | ||
from .handlers import LiveOutputHandler, LogUpdateHandler | ||
from .stop_condition_checkers import ( | ||
JobCancelledChecker, | ||
|
@@ -43,6 +44,41 @@ def __init__(self, job_data, client): | |
self.job_id = job_data.get("job_id") | ||
self.phase = "unknown" | ||
|
||
def get_runner(self, rundir): | ||
hash_length = 6 | ||
# retrieve sensitive patterns from the agent configuration | ||
sensitive_patterns = self.client.config.get("sensitive_patterns", []) | ||
# retrieve secrets from the job | ||
secrets_dict = self.job_data.get("secrets") | ||
|
||
if secrets_dict: | ||
# inject secrets into the runner environment | ||
environment = { | ||
**self.client.config, | ||
**secrets_dict, | ||
} | ||
# add secrets to the sensitive variables | ||
sensitive_patterns.extend(secrets_dict.values()) | ||
# secrets and (possibly) sensitive information | ||
Comment on lines
+55
to
+62
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
masker = Masker( | ||
patterns=sensitive_patterns, hash_length=hash_length | ||
) | ||
return MaskingCommandRunner( | ||
cwd=rundir, env=environment, masker=masker | ||
) | ||
|
||
if sensitive_patterns: | ||
# sensitive patterns but no sensitive variables and no secrets | ||
masker = Masker( | ||
patterns=sensitive_patterns, hash_length=hash_length | ||
) | ||
return MaskingCommandRunner( | ||
cwd=rundir, env=self.client.config, masker=masker | ||
) | ||
|
||
# no secrets, no sensitive information: use regular runner | ||
return CommandRunner(cwd=rundir, env=self.client.config) | ||
|
||
def run_test_phase(self, phase, rundir): | ||
"""Run the specified test phase in rundir | ||
|
||
|
@@ -82,7 +118,7 @@ def run_test_phase(self, phase, rundir): | |
serial_log = os.path.join(rundir, phase + "-serial.log") | ||
|
||
logger.info("Running %s_command: %s", phase, cmd) | ||
runner = CommandRunner(cwd=rundir, env=self.client.config) | ||
runner = self.get_runner(rundir) | ||
output_log_handler = LogUpdateHandler(output_log) | ||
live_output_handler = LiveOutputHandler(self.client, self.job_id) | ||
runner.register_output_handler(output_log_handler) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
""" | ||
Text masking for sensitive information. | ||
|
||
This module provides functionality to mask sensitive information in text. | ||
Each matched pattern is replaced with a deterministic hash, ensuring that | ||
the same sensitive information is always replaced by the same mask. | ||
""" | ||
|
||
import hashlib | ||
import re | ||
from typing import List, Optional | ||
|
||
|
||
class Masker: | ||
""" | ||
A class for masking sensitive information in text. | ||
|
||
Example: | ||
>>> masker = Masker(['\\d{3}-\\d{2}-\\d{4}'], hash_length=6) | ||
>>> masker.apply("SSN: 123-45-6789") | ||
'SSN: **01a546**' | ||
""" | ||
|
||
def __init__(self, patterns: List[str], hash_length: Optional[int] = None): | ||
# join all patterns into a single combined pattern and compile it | ||
combined = self.combine(pattern for pattern in patterns if pattern) | ||
if not combined: | ||
raise ValueError("Empty combined pattern") | ||
self.pattern = re.compile(combined) | ||
# the length of the hash to be used for masking | ||
# (a value of `None` results in the full hash length) | ||
self.hash_length = hash_length | ||
|
||
@staticmethod | ||
def combine(patterns: List[str]) -> str: | ||
"""Return a single disjunctive regular expression from `patterns`""" | ||
return "|".join(f"({pattern})" for pattern in patterns) | ||
|
||
@staticmethod | ||
def hash(text: str) -> str: | ||
"""Return a hash for `text`""" | ||
return hashlib.sha256(text.encode()).hexdigest() | ||
|
||
def mask(self, match: re.Match) -> str: | ||
"""Return a string with a mask applied on a pattern `match`""" | ||
return f"**{self.hash(match.group())[:self.hash_length]}**" | ||
|
||
def apply(self, text: str) -> str: | ||
"""Return a string with all pattern matches in `text` masked""" | ||
return self.pattern.sub(self.mask, text) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import json | ||
import os | ||
import pytest | ||
import re | ||
import shutil | ||
import tempfile | ||
|
||
|
@@ -15,7 +17,7 @@ | |
GlobalTimeoutChecker, | ||
OutputTimeoutChecker, | ||
) | ||
from testflinger_common.enums import TestEvent | ||
from testflinger_common.enums import TestEvent, TestPhase | ||
|
||
|
||
class TestJob: | ||
|
@@ -173,6 +175,111 @@ def test_run_test_phase_with_run_exception( | |
assert exit_event == "setup_fail" | ||
assert exit_reason == "failed" | ||
|
||
def run_testcmds(self, job_data, client, tmp_path) -> str: | ||
# create the outcome file manually | ||
with open(tmp_path / "testflinger-outcome.json", "w") as outcome_file: | ||
outcome_file.write("{}") | ||
# create the agent configuration file manually | ||
with open(tmp_path / "default.yaml", "w") as config_file: | ||
config_file.write("agent_name: agent-007") | ||
# write the job data to `testflinger.json` manually: | ||
# (so that the device connector can pick it up) | ||
with open(tmp_path / "testflinger.json", "w") as job_file: | ||
json.dump(job_data, job_file) | ||
|
||
# running the device connector will result in running the `test_cmds` | ||
self.config["test_command"] = ( | ||
"testflinger-device-connector fake_connector runtest " | ||
f"--config {tmp_path}/default.yaml " | ||
f"{tmp_path}/testflinger.json" | ||
) | ||
|
||
# run the test phase of the job | ||
job = _TestflingerJob(job_data, client) | ||
exit_code, _, _ = job.run_test_phase(TestPhase.TEST, tmp_path) | ||
assert exit_code == 0 | ||
|
||
# capture the output | ||
with open(tmp_path / "test.log") as log: | ||
return log.read() | ||
|
||
def test_run_test_phase_sensitive(self, client, tmp_path, requests_mock): | ||
requests_mock.post(rmock.ANY, status_code=200) | ||
|
||
# sensitive patterns are added to the agent configuration file: | ||
# this will result in the use of a masking command runner | ||
self.config["sensitive_patterns"] = ["[Cc][a-z]*n"] | ||
|
||
# create the job data | ||
job_data = { | ||
"test_data": {"test_cmds": "echo -n Message: Captain Major"} | ||
} | ||
|
||
log_data = self.run_testcmds(job_data, client, tmp_path) | ||
print(log_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you supposed to print log data to stdout, not stderr? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a test, so neither |
||
|
||
# the output should match this pattern, i.e. the sensitive pattern | ||
# after "Message: " should be masked | ||
output_pattern = r"^Message: \*\*([0-9a-f]{6})\*\* Major$" | ||
match = re.search(output_pattern, log_data, re.MULTILINE) | ||
assert match is not None | ||
|
||
def test_run_test_phase_secret(self, client, tmp_path, requests_mock): | ||
requests_mock.post(rmock.ANY, status_code=200) | ||
|
||
# create the job data | ||
job_data = { | ||
"secrets": {"SECRET": "Major"}, | ||
"test_data": { | ||
"test_cmds": "echo -n Message: Captain Major $SECRET" | ||
}, | ||
} | ||
|
||
log_data = self.run_testcmds(job_data, client, tmp_path) | ||
print(log_data) | ||
|
||
# the output should match this pattern, i.e. the secrets | ||
# after "Message: " should be masked | ||
output_pattern = ( | ||
r"^Message: Captain \*\*([0-9a-f]{6})\*\* \*\*([0-9a-f]{6})\*\*$" | ||
) | ||
match = re.search(output_pattern, log_data, re.MULTILINE) | ||
assert match is not None | ||
first, second = match.groups() | ||
assert first == second | ||
|
||
def test_run_test_phase_sensitive_and_secret( | ||
self, client, tmp_path, requests_mock | ||
): | ||
requests_mock.post(rmock.ANY, status_code=200) | ||
|
||
# sensitive patterns are added to the agent configuration file: | ||
self.config["sensitive_patterns"] = ["[Cc][a-z]*n"] | ||
|
||
# create the job data | ||
job_data = { | ||
"secrets": {"SECRET": "Major"}, | ||
"test_data": { | ||
"test_cmds": "echo -n Message: Captain Major $SECRET" | ||
}, | ||
} | ||
|
||
log_data = self.run_testcmds(job_data, client, tmp_path) | ||
print(log_data) | ||
|
||
# the output should match this pattern, i.e. the strings | ||
# after "Message: " should be masked | ||
output_pattern = ( | ||
r"^Message: " | ||
r"\*\*([0-9a-f]{6})\*\* " | ||
r"\*\*([0-9a-f]{6})\*\* " | ||
r"\*\*([0-9a-f]{6})\*\*$" | ||
) | ||
match = re.search(output_pattern, log_data, re.MULTILINE) | ||
assert match is not None | ||
first, second, third = match.groups() | ||
assert first != second and second == third | ||
|
||
def test_set_truncate(self, client): | ||
"""Test the _set_truncate method of TestflingerJob""" | ||
job = _TestflingerJob({}, client) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from testflinger_agent.masking import Masker | ||
|
||
|
||
def test_hash_consistency(): | ||
text = "123 testflinger-456 no-secret" | ||
# hash same text twice and compare hashes | ||
hash1 = Masker.hash(text) | ||
hash2 = Masker.hash(text) | ||
assert hash1 == hash2 | ||
# expected hash length is the full SHA-256 hash, i.e. 64 characters | ||
assert len(hash1) == 64 | ||
|
||
|
||
def test_no_matches(): | ||
# masker for numerical values | ||
masker = Masker([r"\d+"]) | ||
# input text has no numerical values | ||
text = "testflinger" | ||
# there should be no masking | ||
masked = masker.apply(text) | ||
print(masked) | ||
assert masked == text | ||
|
||
|
||
def test_no_hash_length(): | ||
# masker for numerical values, no hash length | ||
masker = Masker([r"\d+"]) | ||
# input text is numerical | ||
text = "123456" | ||
masked = masker.apply(text) | ||
print(masked) | ||
# expected hash length is the full SHA-256 hash, i.e. 64 characters | ||
assert len(masked) == 64 + 4 | ||
|
||
|
||
def test_mask_format(): | ||
hash_length = 12 | ||
text = "123456" | ||
masker = Masker([r"\d+"], hash_length=hash_length) | ||
masked = masker.apply(text) | ||
print(masked) | ||
assert masked.startswith("**") | ||
assert masked.endswith("**") | ||
assert len(masked) == hash_length + 4 | ||
|
||
|
||
def test_apply_with_single_pattern(): | ||
hash_length = 12 | ||
text = "SSN: 123-45-6789" | ||
masker = Masker([r"\b\d{3}-\d{2}-\d{4}\b"], hash_length=hash_length) | ||
masked = masker.apply(text) | ||
print(masked) | ||
assert "123-45-6789" not in masked | ||
assert masked.startswith("SSN: **") | ||
assert masked.endswith("**") | ||
|
||
|
||
def test_apply_with_multiple_matches(): | ||
hash_length = 12 | ||
text = "Contact: john@example.com, SSN: 123-45-6789" | ||
masker = Masker( | ||
[ | ||
r"\b\d{3}-\d{2}-\d{4}\b", | ||
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", | ||
], | ||
hash_length=hash_length, | ||
) | ||
masked = masker.apply(text) | ||
print(masked) | ||
assert "john@example.com" not in masked | ||
assert "123-45-6789" not in masked | ||
assert masked.count("**") == 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the logic is self-explanatory, no need for comments here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I must say that in this particular case the comments were deliberate: I mean to emphasize how the sources of these patterns (sensitive vs. secrets) are different, i.e. that the former is static and comes from the agent configuration whereas the latter is dynamic and differs for every job. I believe one can easily miss that by skimming over the code.