Skip to content

Commit ac5339c

Browse files
committed
[tmpnet] Enable monitoring of local kind cluster
Enable deployment of prometheus and promtail to kube to ensure collection of logs and metrics from nodes deployed on the cluster.
1 parent 948b667 commit ac5339c

File tree

9 files changed

+626
-10
lines changed

9 files changed

+626
-10
lines changed

.github/workflows/ci.yml

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ jobs:
9595
shell: bash
9696
run: nix develop --command bash -x ./scripts/tests.e2e.kube.sh
9797
env:
98+
TMPNET_START_COLLECTORS: ${{ secrets.PROMETHEUS_ID != '' }}
99+
TMPNET_CHECK_MONITORING: ${{ secrets.PROMETHEUS_ID != '' }}
98100
PROMETHEUS_USERNAME: ${{ secrets.PROMETHEUS_ID || '' }}
99101
PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
100102
LOKI_USERNAME: ${{ secrets.LOKI_ID || '' }}

scripts/tests.e2e.kube.sh

+5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ fi
1414

1515
export KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}"
1616

17+
# Enable collector start if credentials are set in the env
18+
if [[ -n "${PROMETHEUS_USERNAME:-}" ]]; then
19+
export TMPNET_START_COLLECTORS=true
20+
fi
21+
1722
./bin/tmpnetctl start-kind-cluster
1823

1924
if [[ -z "${SKIP_BUILD_IMAGE:-}" ]]; then

tests/fixture/e2e/env.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
8787

8888
// Consider monitoring flags for any command but stop
8989
if !flagVars.StopNetwork() {
90-
if flagVars.StartCollectors() {
90+
// TODO(marun) Maybe unify collector deployment between process and kube runtimes?
91+
if flagVars.StartCollectors() && flagVars.NodeRuntimeConfig().KubeRuntimeConfig == nil {
9192
require.NoError(tmpnet.StartCollectors(tc.DefaultContext(), tc.Log()))
9293
}
9394
if flagVars.CheckMonitoring() {
@@ -175,7 +176,8 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
175176
}
176177

177178
// Once one or more nodes are running it should be safe to wait for promtail to report readiness
178-
if flagVars.StartCollectors() {
179+
// TODO(marun) Check the health of the kube collectors
180+
if flagVars.StartCollectors() && flagVars.NodeRuntimeConfig().KubeRuntimeConfig == nil {
179181
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
180182
}
181183

tests/fixture/e2e/flags.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,7 @@ func RegisterFlags() *FlagVars {
203203

204204
// Enable reuse by the upgrade job
205205
func SetMonitoringFlags(startCollectors *bool, checkMonitoring *bool) {
206-
flag.BoolVar(
207-
startCollectors,
208-
"start-collectors",
209-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_COLLECTORS", "false")),
210-
"[optional] whether to start collectors of logs and metrics from nodes of the temporary network.",
211-
)
206+
SetStartCollectorsFlag(flag.BoolVar, startCollectors)
212207
flag.BoolVar(
213208
checkMonitoring,
214209
"check-monitoring",
@@ -239,3 +234,15 @@ func SetKubeFlags(stringVar StringVarFunc, kubeConfigPath *string, kubeConfigCon
239234
"The namespace in the target cluster to create nodes in",
240235
)
241236
}
237+
238+
// Enable reuse by tmpnetctl
239+
type BoolVarFunc func(p *bool, name string, value bool, usage string)
240+
241+
func SetStartCollectorsFlag(boolVar BoolVarFunc, startCollectors *bool) {
242+
boolVar(
243+
startCollectors,
244+
"start-collectors",
245+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_COLLECTORS", "false")),
246+
"[optional] whether to start collectors of logs and metrics from nodes of the temporary network.",
247+
)
248+
}

tests/fixture/tmpnet/cmd/main.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"go.uber.org/zap"
1717

1818
"github.com/ava-labs/avalanchego/tests"
19+
"github.com/ava-labs/avalanchego/tests/fixture/e2e"
1920
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
2021
"github.com/ava-labs/avalanchego/utils/logging"
2122
"github.com/ava-labs/avalanchego/version"
@@ -273,6 +274,7 @@ func main() {
273274
)
274275
rootCmd.AddCommand(checkLogsCmd)
275276

277+
var startCollectors bool
276278
startKindClusterCmd := &cobra.Command{
277279
Use: "start-kind-cluster",
278280
Short: "Starts a local kind cluster with an integrated registry",
@@ -283,10 +285,11 @@ func main() {
283285
if err != nil {
284286
return err
285287
}
286-
return tmpnet.StartKindCluster(ctx, log, kubeConfigPath, kubeConfigContext)
288+
return tmpnet.StartKindCluster(ctx, log, kubeConfigPath, kubeConfigContext, startCollectors)
287289
},
288290
}
289291
SetKubeFlags(startKindClusterCmd.PersistentFlags(), &kubeConfigPath, &kubeConfigContext, &kubeNamespace)
292+
e2e.SetStartCollectorsFlag(startKindClusterCmd.PersistentFlags().BoolVar, &startCollectors)
290293
rootCmd.AddCommand(startKindClusterCmd)
291294

292295
if err := rootCmd.Execute(); err != nil {

tests/fixture/tmpnet/monitor_kube.go

+207
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package tmpnet
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"strings"
10+
11+
"go.uber.org/zap"
12+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
13+
"k8s.io/apimachinery/pkg/runtime/schema"
14+
"k8s.io/apimachinery/pkg/runtime/serializer/yaml"
15+
"k8s.io/client-go/dynamic"
16+
"k8s.io/client-go/kubernetes"
17+
18+
"github.com/ava-labs/avalanchego/utils/logging"
19+
20+
_ "embed"
21+
corev1 "k8s.io/api/core/v1"
22+
apierrors "k8s.io/apimachinery/pkg/api/errors"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
)
25+
26+
//go:embed yaml/promtail-daemonset.yaml
27+
var promtailManifest []byte
28+
29+
//go:embed yaml/prometheus-agent.yaml
30+
var prometheusManifest []byte
31+
32+
// This must match the namespace defined in the manifests
33+
const monitoringNamespace = "ci-monitoring"
34+
35+
type kubeCollectorConfig struct {
36+
name string
37+
secretPrefix string
38+
manifest []byte
39+
}
40+
41+
// DeployKubeCollectors deploys collectors of logs and metrics to a Kubernetes cluster.
42+
func DeployKubeCollectors(ctx context.Context, log logging.Logger, configPath string, configContext string) error {
43+
log.Info("deploying kube collectors for logs and metrics")
44+
45+
clientConfig, err := GetClientConfig(log, configPath, configContext)
46+
if err != nil {
47+
return fmt.Errorf("failed to get client config: %w", err)
48+
}
49+
clientset, err := kubernetes.NewForConfig(clientConfig)
50+
if err != nil {
51+
return fmt.Errorf("failed to create clientset: %w", err)
52+
}
53+
dynamicClient, err := dynamic.NewForConfig(clientConfig)
54+
if err != nil {
55+
return fmt.Errorf("failed to create dynamic client: %w", err)
56+
}
57+
58+
namespace := &corev1.Namespace{
59+
ObjectMeta: metav1.ObjectMeta{
60+
Name: monitoringNamespace,
61+
},
62+
}
63+
_, err = clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{})
64+
if err != nil && !apierrors.IsAlreadyExists(err) {
65+
return fmt.Errorf("failed to create namespace %s: %w", monitoringNamespace, err)
66+
}
67+
68+
collectorConfigs := []kubeCollectorConfig{
69+
{
70+
name: promtailCmd,
71+
secretPrefix: "loki",
72+
manifest: promtailManifest,
73+
},
74+
{
75+
name: prometheusCmd,
76+
secretPrefix: prometheusCmd,
77+
manifest: prometheusManifest,
78+
},
79+
}
80+
for _, collectorConfig := range collectorConfigs {
81+
if err := deployKubeCollector(ctx, log, clientset, dynamicClient, collectorConfig); err != nil {
82+
return err
83+
}
84+
}
85+
86+
return nil
87+
}
88+
89+
// deployKubeCollector deploys a the named collector to a Kubernetes cluster via the provided manifest bytes.
90+
func deployKubeCollector(
91+
ctx context.Context,
92+
log logging.Logger,
93+
clientset *kubernetes.Clientset,
94+
dynamicClient dynamic.Interface,
95+
collectorConfig kubeCollectorConfig,
96+
) error {
97+
username, password, err := getCollectorCredentials(collectorConfig.name)
98+
if err != nil {
99+
return fmt.Errorf("failed to get credentials for %s: %w", collectorConfig.name, err)
100+
}
101+
102+
if err := createCredentialSecret(ctx, log, clientset, collectorConfig.secretPrefix, username, password); err != nil {
103+
return fmt.Errorf("failed to create credential secret for %s: %w", collectorConfig.name, err)
104+
}
105+
106+
if err := applyManifest(ctx, log, dynamicClient, collectorConfig.manifest); err != nil {
107+
return fmt.Errorf("failed to apply manifest for %s: %w", collectorConfig.name, err)
108+
}
109+
return nil
110+
}
111+
112+
// createCredentialSecret creates a secret with the provided username and password for a collector
113+
func createCredentialSecret(
114+
ctx context.Context,
115+
log logging.Logger,
116+
clientset *kubernetes.Clientset,
117+
namePrefix string,
118+
username string,
119+
password string,
120+
) error {
121+
secretName := namePrefix + "-credentials"
122+
secret := &corev1.Secret{
123+
ObjectMeta: metav1.ObjectMeta{
124+
Name: secretName,
125+
},
126+
StringData: map[string]string{
127+
"username": username,
128+
"password": password,
129+
},
130+
}
131+
_, err := clientset.CoreV1().Secrets(monitoringNamespace).Create(ctx, secret, metav1.CreateOptions{})
132+
if err != nil {
133+
if apierrors.IsAlreadyExists(err) {
134+
log.Info("secret already exists",
135+
zap.String("namespace", monitoringNamespace),
136+
zap.String("name", secretName),
137+
)
138+
return nil
139+
}
140+
return fmt.Errorf("failed to create secret %s/%s: %w", monitoringNamespace, secretName, err)
141+
}
142+
143+
log.Info("created secret",
144+
zap.String("namespace", monitoringNamespace),
145+
zap.String("name", secretName),
146+
)
147+
148+
return nil
149+
}
150+
151+
// applyManifest creates the resources defined by the provided manifest in a manner similar to `kubectl apply -f`
152+
func applyManifest(
153+
ctx context.Context,
154+
log logging.Logger,
155+
dynamicClient dynamic.Interface,
156+
manifest []byte,
157+
) error {
158+
// Split the manifest into individual resources
159+
decoder := yaml.NewDecodingSerializer(unstructured.UnstructuredJSONScheme)
160+
documents := strings.Split(string(manifest), "\n---\n")
161+
162+
for _, doc := range documents {
163+
doc := strings.TrimSpace(doc)
164+
if strings.TrimSpace(doc) == "" || strings.HasPrefix(doc, "#") {
165+
continue
166+
}
167+
168+
obj := &unstructured.Unstructured{}
169+
_, gvk, err := decoder.Decode([]byte(doc), nil, obj)
170+
if err != nil {
171+
return fmt.Errorf("failed to decode manifest: %w", err)
172+
}
173+
174+
gvr := schema.GroupVersionResource{
175+
Group: gvk.Group,
176+
Version: gvk.Version,
177+
Resource: strings.ToLower(gvk.Kind) + "s",
178+
}
179+
180+
var resourceInterface dynamic.ResourceInterface
181+
if strings.HasPrefix(gvk.Kind, "Cluster") || gvk.Kind == "Namespace" {
182+
resourceInterface = dynamicClient.Resource(gvr)
183+
} else {
184+
resourceInterface = dynamicClient.Resource(gvr).Namespace(monitoringNamespace)
185+
}
186+
187+
_, err = resourceInterface.Create(ctx, obj, metav1.CreateOptions{})
188+
if err != nil {
189+
if apierrors.IsAlreadyExists(err) {
190+
log.Info("resource already exists",
191+
zap.String("kind", gvk.Kind),
192+
zap.String("namespace", monitoringNamespace),
193+
zap.String("name", obj.GetName()),
194+
)
195+
continue
196+
}
197+
return fmt.Errorf("failed to create %s %s/%s: %w", gvk.Kind, monitoringNamespace, obj.GetName(), err)
198+
}
199+
log.Info("created resource",
200+
zap.String("kind", gvk.Kind),
201+
zap.String("namespace", monitoringNamespace),
202+
zap.String("name", obj.GetName()),
203+
)
204+
}
205+
206+
return nil
207+
}

tests/fixture/tmpnet/start_kind_cluster.go

+13-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ func CheckClusterRunning(log logging.Logger, configPath string, configContext st
3232
}
3333

3434
// StartKindCluster starts a new kind cluster if one is not already running.
35-
func StartKindCluster(ctx context.Context, log logging.Logger, configPath string, configContext string) error {
35+
func StartKindCluster(
36+
ctx context.Context,
37+
log logging.Logger,
38+
configPath string,
39+
configContext string,
40+
startCollectors bool,
41+
) error {
3642
err := CheckClusterRunning(log, configPath, configContext)
3743
if err == nil {
3844
log.Info("kubernetes cluster already running",
@@ -75,5 +81,11 @@ func StartKindCluster(ctx context.Context, log logging.Logger, configPath string
7581
zap.String("namespace", DefaultTmpnetNamespace),
7682
)
7783

84+
if startCollectors {
85+
if err := DeployKubeCollectors(ctx, log, configPath, configContext); err != nil {
86+
return fmt.Errorf("failed to deploy kube collectors: %w", err)
87+
}
88+
}
89+
7890
return nil
7991
}

0 commit comments

Comments
 (0)