Skip to content

Commit b742fdd

Browse files
committed
[tmpnet] Enable monitoring of local kind cluster
Enable deployment of prometheus and promtail to kube to ensure collection of logs and metrics from nodes deployed on the cluster.
1 parent 948b667 commit b742fdd

File tree

9 files changed

+627
-10
lines changed

9 files changed

+627
-10
lines changed

.github/workflows/ci.yml

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ jobs:
9595
shell: bash
9696
run: nix develop --command bash -x ./scripts/tests.e2e.kube.sh
9797
env:
98+
TMPNET_START_COLLECTORS: ${{ secrets.PROMETHEUS_ID != '' }}
99+
TMPNET_CHECK_MONITORING: ${{ secrets.PROMETHEUS_ID != '' }}
98100
PROMETHEUS_USERNAME: ${{ secrets.PROMETHEUS_ID || '' }}
99101
PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
100102
LOKI_USERNAME: ${{ secrets.LOKI_ID || '' }}

scripts/tests.e2e.kube.sh

+5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ fi
1414

1515
export KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}"
1616

17+
# Enable collector start if credentials are set in the env
18+
if [[ -n "${PROMETHEUS_USERNAME:-}" ]]; then
19+
export TMPNET_START_COLLECTORS=true
20+
fi
21+
1722
./bin/tmpnetctl start-kind-cluster
1823

1924
if [[ -z "${SKIP_BUILD_IMAGE:-}" ]]; then

tests/fixture/e2e/env.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
8787

8888
// Consider monitoring flags for any command but stop
8989
if !flagVars.StopNetwork() {
90-
if flagVars.StartCollectors() {
90+
// TODO(marun) Maybe unify collector deployment between process and kube runtimes?
91+
if flagVars.StartCollectors() && flagVars.NodeRuntimeConfig().KubeRuntimeConfig == nil {
9192
require.NoError(tmpnet.StartCollectors(tc.DefaultContext(), tc.Log()))
9293
}
9394
if flagVars.CheckMonitoring() {
@@ -175,7 +176,8 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
175176
}
176177

177178
// Once one or more nodes are running it should be safe to wait for promtail to report readiness
178-
if flagVars.StartCollectors() {
179+
// TODO(marun) Check the health of the kube collectors
180+
if flagVars.StartCollectors() && flagVars.NodeRuntimeConfig().KubeRuntimeConfig == nil {
179181
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
180182
}
181183

tests/fixture/e2e/flags.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,7 @@ func RegisterFlags() *FlagVars {
203203

204204
// Enable reuse by the upgrade job
205205
func SetMonitoringFlags(startCollectors *bool, checkMonitoring *bool) {
206-
flag.BoolVar(
207-
startCollectors,
208-
"start-collectors",
209-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_COLLECTORS", "false")),
210-
"[optional] whether to start collectors of logs and metrics from nodes of the temporary network.",
211-
)
206+
SetStartCollectorsFlag(flag.BoolVar, startCollectors)
212207
flag.BoolVar(
213208
checkMonitoring,
214209
"check-monitoring",
@@ -239,3 +234,15 @@ func SetKubeFlags(stringVar StringVarFunc, kubeConfigPath *string, kubeConfigCon
239234
"The namespace in the target cluster to create nodes in",
240235
)
241236
}
237+
238+
// Enable reuse by tmpnetctl
239+
type BoolVarFunc func(p *bool, name string, value bool, usage string)
240+
241+
func SetStartCollectorsFlag(boolVar BoolVarFunc, startCollectors *bool) {
242+
boolVar(
243+
startCollectors,
244+
"start-collectors",
245+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_COLLECTORS", "false")),
246+
"[optional] whether to start collectors of logs and metrics from nodes of the temporary network.",
247+
)
248+
}

tests/fixture/tmpnet/cmd/main.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"go.uber.org/zap"
1717

1818
"github.com/ava-labs/avalanchego/tests"
19+
"github.com/ava-labs/avalanchego/tests/fixture/e2e"
1920
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
2021
"github.com/ava-labs/avalanchego/utils/logging"
2122
"github.com/ava-labs/avalanchego/version"
@@ -273,6 +274,7 @@ func main() {
273274
)
274275
rootCmd.AddCommand(checkLogsCmd)
275276

277+
var startCollectors bool
276278
startKindClusterCmd := &cobra.Command{
277279
Use: "start-kind-cluster",
278280
Short: "Starts a local kind cluster with an integrated registry",
@@ -283,10 +285,11 @@ func main() {
283285
if err != nil {
284286
return err
285287
}
286-
return tmpnet.StartKindCluster(ctx, log, kubeConfigPath, kubeConfigContext)
288+
return tmpnet.StartKindCluster(ctx, log, kubeConfigPath, kubeConfigContext, startCollectors)
287289
},
288290
}
289291
SetKubeFlags(startKindClusterCmd.PersistentFlags(), &kubeConfigPath, &kubeConfigContext, &kubeNamespace)
292+
e2e.SetStartCollectorsFlag(startKindClusterCmd.PersistentFlags().BoolVar, &startCollectors)
290293
rootCmd.AddCommand(startKindClusterCmd)
291294

292295
if err := rootCmd.Execute(); err != nil {

tests/fixture/tmpnet/monitor_kube.go

+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package tmpnet
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"strings"
10+
11+
"go.uber.org/zap"
12+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
13+
"k8s.io/apimachinery/pkg/runtime/schema"
14+
"k8s.io/apimachinery/pkg/runtime/serializer/yaml"
15+
"k8s.io/client-go/dynamic"
16+
"k8s.io/client-go/kubernetes"
17+
18+
_ "embed"
19+
20+
"github.com/ava-labs/avalanchego/utils/logging"
21+
22+
corev1 "k8s.io/api/core/v1"
23+
apierrors "k8s.io/apimachinery/pkg/api/errors"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
)
26+
27+
//go:embed yaml/promtail-daemonset.yaml
28+
var promtailManifest []byte
29+
30+
//go:embed yaml/prometheus-agent.yaml
31+
var prometheusManifest []byte
32+
33+
// This must match the namespace defined in the manifests
34+
const monitoringNamespace = "ci-monitoring"
35+
36+
type kubeCollectorConfig struct {
37+
name string
38+
secretPrefix string
39+
manifest []byte
40+
}
41+
42+
// DeployKubeCollectors deploys collectors of logs and metrics to a Kubernetes cluster.
43+
func DeployKubeCollectors(ctx context.Context, log logging.Logger, configPath string, configContext string) error {
44+
log.Info("deploying kube collectors for logs and metrics")
45+
46+
clientConfig, err := GetClientConfig(log, configPath, configContext)
47+
if err != nil {
48+
return fmt.Errorf("failed to get client config: %w", err)
49+
}
50+
clientset, err := kubernetes.NewForConfig(clientConfig)
51+
if err != nil {
52+
return fmt.Errorf("failed to create clientset: %w", err)
53+
}
54+
dynamicClient, err := dynamic.NewForConfig(clientConfig)
55+
if err != nil {
56+
return fmt.Errorf("failed to create dynamic client: %w", err)
57+
}
58+
59+
namespace := &corev1.Namespace{
60+
ObjectMeta: metav1.ObjectMeta{
61+
Name: monitoringNamespace,
62+
},
63+
}
64+
_, err = clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{})
65+
if err != nil && !apierrors.IsAlreadyExists(err) {
66+
return fmt.Errorf("failed to create namespace %s: %w", monitoringNamespace, err)
67+
}
68+
69+
collectorConfigs := []kubeCollectorConfig{
70+
{
71+
name: promtailCmd,
72+
secretPrefix: "loki",
73+
manifest: promtailManifest,
74+
},
75+
{
76+
name: prometheusCmd,
77+
secretPrefix: prometheusCmd,
78+
manifest: prometheusManifest,
79+
},
80+
}
81+
for _, collectorConfig := range collectorConfigs {
82+
if err := deployKubeCollector(ctx, log, clientset, dynamicClient, collectorConfig); err != nil {
83+
return err
84+
}
85+
}
86+
87+
return nil
88+
}
89+
90+
// deployKubeCollector deploys a the named collector to a Kubernetes cluster via the provided manifest bytes.
91+
func deployKubeCollector(
92+
ctx context.Context,
93+
log logging.Logger,
94+
clientset *kubernetes.Clientset,
95+
dynamicClient dynamic.Interface,
96+
collectorConfig kubeCollectorConfig,
97+
) error {
98+
username, password, err := getCollectorCredentials(collectorConfig.name)
99+
if err != nil {
100+
return fmt.Errorf("failed to get credentials for %s: %w", collectorConfig.name, err)
101+
}
102+
103+
if err := createCredentialSecret(ctx, log, clientset, collectorConfig.secretPrefix, username, password); err != nil {
104+
return fmt.Errorf("failed to create credential secret for %s: %w", collectorConfig.name, err)
105+
}
106+
107+
if err := applyManifest(ctx, log, dynamicClient, collectorConfig.manifest); err != nil {
108+
return fmt.Errorf("failed to apply manifest for %s: %w", collectorConfig.name, err)
109+
}
110+
return nil
111+
}
112+
113+
// createCredentialSecret creates a secret with the provided username and password for a collector
114+
func createCredentialSecret(
115+
ctx context.Context,
116+
log logging.Logger,
117+
clientset *kubernetes.Clientset,
118+
namePrefix string,
119+
username string,
120+
password string,
121+
) error {
122+
secretName := namePrefix + "-credentials"
123+
secret := &corev1.Secret{
124+
ObjectMeta: metav1.ObjectMeta{
125+
Name: secretName,
126+
},
127+
StringData: map[string]string{
128+
"username": username,
129+
"password": password,
130+
},
131+
}
132+
_, err := clientset.CoreV1().Secrets(monitoringNamespace).Create(ctx, secret, metav1.CreateOptions{})
133+
if err != nil {
134+
if apierrors.IsAlreadyExists(err) {
135+
log.Info("secret already exists",
136+
zap.String("namespace", monitoringNamespace),
137+
zap.String("name", secretName),
138+
)
139+
return nil
140+
}
141+
return fmt.Errorf("failed to create secret %s/%s: %w", monitoringNamespace, secretName, err)
142+
}
143+
144+
log.Info("created secret",
145+
zap.String("namespace", monitoringNamespace),
146+
zap.String("name", secretName),
147+
)
148+
149+
return nil
150+
}
151+
152+
// applyManifest creates the resources defined by the provided manifest in a manner similar to `kubectl apply -f`
153+
func applyManifest(
154+
ctx context.Context,
155+
log logging.Logger,
156+
dynamicClient dynamic.Interface,
157+
manifest []byte,
158+
) error {
159+
// Split the manifest into individual resources
160+
decoder := yaml.NewDecodingSerializer(unstructured.UnstructuredJSONScheme)
161+
documents := strings.Split(string(manifest), "\n---\n")
162+
163+
for _, doc := range documents {
164+
doc := strings.TrimSpace(doc)
165+
if strings.TrimSpace(doc) == "" || strings.HasPrefix(doc, "#") {
166+
continue
167+
}
168+
169+
obj := &unstructured.Unstructured{}
170+
_, gvk, err := decoder.Decode([]byte(doc), nil, obj)
171+
if err != nil {
172+
return fmt.Errorf("failed to decode manifest: %w", err)
173+
}
174+
175+
gvr := schema.GroupVersionResource{
176+
Group: gvk.Group,
177+
Version: gvk.Version,
178+
Resource: strings.ToLower(gvk.Kind) + "s",
179+
}
180+
181+
var resourceInterface dynamic.ResourceInterface
182+
if strings.HasPrefix(gvk.Kind, "Cluster") || gvk.Kind == "Namespace" {
183+
resourceInterface = dynamicClient.Resource(gvr)
184+
} else {
185+
resourceInterface = dynamicClient.Resource(gvr).Namespace(monitoringNamespace)
186+
}
187+
188+
_, err = resourceInterface.Create(ctx, obj, metav1.CreateOptions{})
189+
if err != nil {
190+
if apierrors.IsAlreadyExists(err) {
191+
log.Info("resource already exists",
192+
zap.String("kind", gvk.Kind),
193+
zap.String("namespace", monitoringNamespace),
194+
zap.String("name", obj.GetName()),
195+
)
196+
continue
197+
}
198+
return fmt.Errorf("failed to create %s %s/%s: %w", gvk.Kind, monitoringNamespace, obj.GetName(), err)
199+
}
200+
log.Info("created resource",
201+
zap.String("kind", gvk.Kind),
202+
zap.String("namespace", monitoringNamespace),
203+
zap.String("name", obj.GetName()),
204+
)
205+
}
206+
207+
return nil
208+
}

tests/fixture/tmpnet/start_kind_cluster.go

+13-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ func CheckClusterRunning(log logging.Logger, configPath string, configContext st
3232
}
3333

3434
// StartKindCluster starts a new kind cluster if one is not already running.
35-
func StartKindCluster(ctx context.Context, log logging.Logger, configPath string, configContext string) error {
35+
func StartKindCluster(
36+
ctx context.Context,
37+
log logging.Logger,
38+
configPath string,
39+
configContext string,
40+
startCollectors bool,
41+
) error {
3642
err := CheckClusterRunning(log, configPath, configContext)
3743
if err == nil {
3844
log.Info("kubernetes cluster already running",
@@ -75,5 +81,11 @@ func StartKindCluster(ctx context.Context, log logging.Logger, configPath string
7581
zap.String("namespace", DefaultTmpnetNamespace),
7682
)
7783

84+
if startCollectors {
85+
if err := DeployKubeCollectors(ctx, log, configPath, configContext); err != nil {
86+
return fmt.Errorf("failed to deploy kube collectors: %w", err)
87+
}
88+
}
89+
7890
return nil
7991
}

0 commit comments

Comments
 (0)