Skip to content

Commit

Permalink
Merge pull request #338 from alexmwu/logging-fix
Browse files Browse the repository at this point in the history
Fix logging blocking issue
  • Loading branch information
alexmwu authored Aug 29, 2023
2 parents 884b941 + 2531da7 commit d86a047
Show file tree
Hide file tree
Showing 37 changed files with 821 additions and 123 deletions.
18 changes: 17 additions & 1 deletion cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ steps:
cd launcher/image/test
echo "running launch policy tests on ${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX}"
gcloud builds submit --config=test_launchpolicy_cloudbuild.yaml --region us-west1 \
--substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID}
--substitutions _HARDENED_IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID}
exit
- name: 'gcr.io/cloud-builders/gcloud'
Expand Down Expand Up @@ -139,6 +139,22 @@ steps:
gcloud builds submit --config=test_ingress_network.yaml --region us-west1 \
--substitutions _IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-debug-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID}
exit
- name: 'gcr.io/cloud-builders/gcloud'
id: LogRedirectionTests
waitFor: ['HardenedImageBuild']
env:
- 'OUTPUT_IMAGE_PREFIX=$_OUTPUT_IMAGE_PREFIX'
- 'OUTPUT_IMAGE_SUFFIX=$_OUTPUT_IMAGE_SUFFIX'
- 'PROJECT_ID=$PROJECT_ID'
script: |
#!/usr/bin/env bash
cd launcher/image/test
echo "running log redirection tests on ${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX}"
gcloud builds submit --config=test_log_redirection.yaml --region us-west1 \
--substitutions _HARDENED_IMAGE_NAME=${OUTPUT_IMAGE_PREFIX}-hardened-${OUTPUT_IMAGE_SUFFIX},_IMAGE_PROJECT=${PROJECT_ID}
exit
options:
pool:
Expand Down
34 changes: 24 additions & 10 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ import (

// ContainerRunner contains information about the container settings
type ContainerRunner struct {
container containerd.Container
launchSpec spec.LaunchSpec
attestAgent agent.AttestationAgent
logger *log.Logger
container containerd.Container
launchSpec spec.LaunchSpec
attestAgent agent.AttestationAgent
logger *log.Logger
serialConsole *os.File
}

const (
Expand Down Expand Up @@ -100,7 +101,7 @@ func fetchImpersonatedToken(ctx context.Context, serviceAccount string, audience
}

// NewRunner returns a runner.
func NewRunner(ctx context.Context, cdClient *containerd.Client, token oauth2.Token, launchSpec spec.LaunchSpec, mdsClient *metadata.Client, tpm io.ReadWriteCloser, logger *log.Logger) (*ContainerRunner, error) {
func NewRunner(ctx context.Context, cdClient *containerd.Client, token oauth2.Token, launchSpec spec.LaunchSpec, mdsClient *metadata.Client, tpm io.ReadWriteCloser, logger *log.Logger, serialConsole *os.File) (*ContainerRunner, error) {
image, err := initImage(ctx, cdClient, launchSpec, token)
if err != nil {
return nil, err
Expand Down Expand Up @@ -241,6 +242,7 @@ func NewRunner(ctx context.Context, cdClient *containerd.Client, token oauth2.To
launchSpec,
agent.CreateAttestationAgent(tpm, client.GceAttestationKeyECC, verifierClient, principalFetcher),
logger,
serialConsole,
}, nil
}

Expand Down Expand Up @@ -506,12 +508,24 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
}

var streamOpt cio.Opt
if r.launchSpec.LogRedirect {
streamOpt = cio.WithStreams(nil, r.logger.Writer(), r.logger.Writer())
r.logger.Println("container stdout/stderr will be redirected")
} else {
switch r.launchSpec.LogRedirect {
case spec.Nowhere:
streamOpt = cio.WithStreams(nil, nil, nil)
r.logger.Println("container stdout/stderr will not be redirected")
r.logger.Println("Container stdout/stderr will not be redirected.")
case spec.Everywhere:
w := io.MultiWriter(os.Stdout, r.serialConsole)
streamOpt = cio.WithStreams(nil, w, w)
r.logger.Println("Container stdout/stderr will be redirected to serial and Cloud Logging. " +
"This may result in performance issues due to slow serial console writes.")
case spec.CloudLogging:
streamOpt = cio.WithStreams(nil, os.Stdout, os.Stdout)
r.logger.Println("Container stdout/stderr will be redirected to Cloud Logging.")
case spec.Serial:
streamOpt = cio.WithStreams(nil, r.serialConsole, r.serialConsole)
r.logger.Println("Container stdout/stderr will be redirected to serial logging. " +
"This may result in performance issues due to slow serial console writes.")
default:
return fmt.Errorf("unknown logging redirect location: %v", r.launchSpec.LogRedirect)
}

task, err := r.container.NewTask(ctx, cio.NewCreator(streamOpt))
Expand Down
1 change: 0 additions & 1 deletion launcher/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ go 1.20

require (
cloud.google.com/go/compute v1.7.0
cloud.google.com/go/logging v1.4.2
github.com/cenkalti/backoff/v4 v4.1.3
github.com/containerd/containerd v1.6.18
github.com/golang-jwt/jwt/v4 v4.4.1
Expand Down
4 changes: 2 additions & 2 deletions launcher/image/container-runner.service
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ After=network-online.target gcr-online.target containerd.service
ExecStart=/usr/share/oem/confidential_space/cs_container_launcher
ExecStopPost=/usr/share/oem/confidential_space/exit_script.sh
Restart=no
StandardOutput=journal+console
StandardError=journal+console
StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target
6 changes: 5 additions & 1 deletion launcher/image/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
#!/bin/bash

main() {
# copy systemd files
# Copy service files.
cp /usr/share/oem/confidential_space/container-runner.service /etc/systemd/system/container-runner.service
# Override default fluent-bit config.
cp /usr/share/oem/confidential_space/fluent-bit-cs.conf /etc/fluent-bit/fluent-bit.conf

systemctl daemon-reload
systemctl enable container-runner.service
systemctl start container-runner.service
systemctl start fluent-bit.service

}

main
65 changes: 65 additions & 0 deletions launcher/image/fluent-bit-cs.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Forked from https://cos.googlesource.com/cos/overlays/board-overlays/+/refs/heads/master/project-lakitu/app-admin/fluent-bit/files/fluent-bit.conf

[SERVICE]
# Flush
# =====
# set an interval of seconds before to flush records to a destination
flush 1
# Daemon
# ======
# instruct Fluent Bit to run in foreground or background mode.
daemon Off
# Log_Level
# =========
# Set the verbosity level of the service, values can be:
#
# - error
# - warning
# - info
# - debug
# - trace
#
# by default 'info' is set, that means it includes 'error' and 'warning'.
log_level info
# Storage
# =======
# Fluent Bit can use memory and filesystem buffering based mechanisms
#
# - https://docs.fluentbit.io/manual/administration/buffering-and-storage
#
# storage metrics
# ---------------
# publish storage pipeline metrics in '/api/v1/storage'. The metrics are
# exported only if the 'http_server' option is enabled.
#
storage.metrics on

# Collects CS launcher and workload logs.
[INPUT]
Name systemd
Tag confidential-space-launcher
Systemd_Filter _SYSTEMD_UNIT=container-runner.service
DB /var/log/google-fluentbit/container-runner.log.db
Read_From_Tail False

[OUTPUT]
Name stackdriver
Match *
Resource gce_instance
severity_key severity
14 changes: 12 additions & 2 deletions launcher/image/preload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,20 @@ configure_necessary_systemd_units() {
# Dependencies of container-runner.service.
enable_unit "network-online.target"
enable_unit "gcr-online.target"

}

configure_cloud_logging() {
# Copy CS-specific fluent-bit config to OEM partition.
cp fluent-bit-cs.conf "${CS_PATH}"
}

configure_systemd_units_for_debug() {
# No-op for now, as debug will default to using multi-user.target.
:
configure_cloud_logging
}
configure_systemd_units_for_hardened() {
configure_necessary_systemd_units
configure_cloud_logging
# Make entrypoint (via cloud-init) the default unit.
set_default_boot_target "cloud-final.service"

Expand All @@ -70,6 +76,10 @@ configure_systemd_units_for_hardened() {
disable_unit "google-startup-scripts.service"
disable_unit "google-shutdown-scripts.service"
disable_unit "konlet-startup.service"
disable_unit "crash-reporter.service"
disable_unit "device_policy_manager.service"
disable_unit "node-problem-detector.service"
disable_unit "docker-events-collector-fluent-bit.service"
disable_unit "sshd.service"
disable_unit "var-lib-toolbox.mount"
}
Expand Down
1 change: 1 addition & 0 deletions launcher/image/test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Scripts in `util/` contain functions that can be sourced from other test scripts
* `/workspace/status.txt` contains the success/failure message from test steps.
`check_failure.sh` looks for a failed message in the step to determine whether
the cloud build is successful.
* `workspace/next_start.txt` is used when reading the serial logs.

## Test Failures
Due to the sequential/only-proceed-with-success nature of Cloud Build, tests
Expand Down
111 changes: 111 additions & 0 deletions launcher/image/test/scripts/test_launcher_workload_cloudlogging.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/bin/bash
set -euo pipefail
source util/read_cloud_logging.sh

# This test requires the workload to run and print
# corresponding messages to cloud logging.
CLOUD_LOGGING_OUTPUT=$(read_cloud_logging $1)
print_logs=false

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'Workload running'
then
echo "- workload running verified"
else
echo "FAILED: workload not running"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'Workload args: \[/main newCmd\]'
then
echo "- arguments verified"
else
echo "FAILED: arguments not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'env_bar=val_bar'
then
echo "- env_bar env var verified"
else
echo "FAILED: env_bar env not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'ALLOWED_OVERRIDE=overridden'
then
echo "- ALLOWED_OVERRIDE env var verified"
else
echo "FAILED: ALLOWED_OVERRIDE env not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'aud: https://sts.googleapis.com'
then
echo "- token aud verified"
else
echo "FAILED: token aud not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'iss: https://confidentialcomputing.googleapis.com'
then
echo "- token iss verified"
else
echo "FAILED: token iss not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'secboot: true'
then
echo "- token secboot verified"
else
echo "FAILED: token secboot not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'oemid: 11129'
then
echo "- token oemid verified"
else
echo "FAILED: token oemid not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'hwmodel: GCP_AMD_SEV'
then
echo "- token hwmodel verified"
else
echo "FAILED: token hwmodel not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'swname: GCE'
then
echo "- token swname verified"
else
echo "FAILED: token swname not verified"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if echo $CLOUD_LOGGING_OUTPUT | grep -q 'Token looks okay'
then
echo "- OIDC token accessible"
else
echo "FAILED: OIDC token not accessible"
echo 'TEST FAILED.' > /workspace/status.txt
print_logs=true
fi

if $print_logs; then
echo $CLOUD_LOGGING_OUTPUT
fi
16 changes: 16 additions & 0 deletions launcher/image/test/scripts/test_launchpolicy_cmd_cloudlogging.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
set -euo pipefail
source util/read_cloud_logging.sh

# Allow VM some time to boot and write to serial console.
sleep 120

CLOUD_LOGGING_OUTPUT=$(read_cloud_logging $1)
if echo $CLOUD_LOGGING_OUTPUT | grep -q 'CMD is not allowed to be overridden on this image'
then
echo "- CMD launch policy verified"
else
echo "FAILED: CMD launch policy verification"
echo 'TEST FAILED' > /workspace/status.txt
echo $CLOUD_LOGGING_OUTPUT
fi
16 changes: 16 additions & 0 deletions launcher/image/test/scripts/test_launchpolicy_env_cloudlogging.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
set -euo pipefail
source util/read_cloud_logging.sh

# Allow VM some time to boot and write to cloud logging.
sleep 120

CLOUD_LOGGING_OUTPUT=$(read_cloud_logging $1)
if echo $CLOUD_LOGGING_OUTPUT | grep -q --fixed-strings 'env var {OUT a} is not allowed to be overridden on this image; allowed envs to be overridden: [ALLOWED_OVERRIDE]'
then
echo "- Env launch policy verified"
else
echo "FAILED: Env launch policy verification"
echo 'TEST FAILED' > /workspace/status.txt
echo $CLOUD_LOGGING_OUTPUT
fi
Loading

0 comments on commit d86a047

Please sign in to comment.