-
Notifications
You must be signed in to change notification settings - Fork 990
285 lines (254 loc) · 11 KB
/
chaos.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
name: "chaos-test"
on:
push:
branches:
- 'release-**'
- 'main'
paths:
- '**/chaos.yml'
pull_request:
branches:
- 'main'
- 'release-**'
paths:
- '**/chaos.yml'
workflow_dispatch:
inputs:
debug:
type: boolean
description: "Run the build with tmate debugging enabled"
required: false
default: false
schedule:
- cron: '0 20 * * *'
jobs:
chaos-test:
timeout-minutes: 60
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
# chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
# chaos: ["minio-io"]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 1
- uses: actions/setup-go@v3
with:
go-version: 'oldstable'
cache: true
- name: Build
timeout-minutes: 10
run: |
sudo .github/scripts/apt_install.sh musl-tools upx-ucl
export STATIC=1
make juicefs
- name: Creating kind cluster
uses: helm/kind-action@v1.5.0
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
helm version
kubectl version
# - name: Build And Load CSI Docker Image
# run: |
# echo GITHUB_REF is $GITHUB_REF
# echo GITHUB_SHA is $GITHUB_SHA
# helm repo add juicefs https://juicedata.github.io/charts/
# helm repo update
# APP_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $3}')
# echo APP_VERSION is $APP_VERSION
# docker build --build-arg GITHUB_REF=$GITHUB_REF --build-arg GITHUB_SHA=$GITHUB_SHA -f .github/scripts/chaos/juicefs-csi-driver.Dockerfile -t juicedata/juicefs-csi-driver:v$APP_VERSION .
# kind load docker-image juicedata/juicefs-csi-driver:v$APP_VERSION --name chart-testing
- name: Build And Load CSI Docker Image
run: |
version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1`
docker build -f .github/scripts/chaos/juicefs.Dockerfile -t juicedata/mount:ce-v${version} .
helm repo add juicefs https://juicedata.github.io/charts/
helm repo update
kind load docker-image juicedata/mount:ce-v${version} --name chart-testing
- name: Install JuiceFS CSI Driver
run: |
CHART_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $2}')
echo CHART_VERSION is $CHART_VERSION
helm install juicefs-csi-driver juicefs/juicefs-csi-driver -n kube-system --version $CHART_VERSION
kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver
- name: Deploy redis
run: |
kubectl apply -f .github/scripts/chaos/redis.yaml
- name: Deploy minio
run: |
rm -rf /data/minio-data/*
kubectl apply -f .github/scripts/chaos/minio.yaml
- name: Mount Juicefs
run: |
version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1`
sed -i "s/mount:ci/mount:ce-v$version/" .github/scripts/chaos/sc.yaml
kubectl apply -f .github/scripts/chaos/sc.yaml
kubectl apply -f .github/scripts/chaos/pvc.yaml
- name: Start vdbenh
run: |
kubectl apply -f .github/scripts/chaos/dynamic.yaml
- name: Install Chaos Mesh
run: |
helm version
kubectl version
helm repo add chaos-mesh https://charts.chaos-mesh.org
kubectl create ns chaos-mesh
helm install chaos-mesh chaos-mesh/chaos-mesh -n=chaos-mesh --version 2.5.1 \
--set chaosDaemon.runtime=containerd \
--set chaosDaemon.socketPath=/run/containerd/containerd.sock \
--set controllerManager.replicaCount=1
echo "wait pod status to running"
for ((k=0; k<120; k++)); do
kubectl get pods --namespace chaos-mesh -l app.kubernetes.io/instance=chaos-mesh > pods.status
cat pods.status
run_num=`grep Running pods.status | wc -l`
pod_num=$((`cat pods.status | wc -l` - 1))
if [ $run_num == $pod_num ]; then
break
fi
sleep 1
done
- name: Run chaos mesh action
run: |
chaos=${{matrix.chaos}}
sed -i "s/# - $chaos/- $chaos/g" .github/scripts/chaos/workflow.yaml
cat .github/scripts/chaos/workflow.yaml
kubectl apply -f .github/scripts/chaos/workflow.yaml
- name: Verify
run: |
for i in {1..1200}; do
if kubectl get pods --all-namespaces | grep dynamic-ce | grep -i "Completed"; then
echo "dynamic-ce is completed in $i seconds"
break
else
if [ $((i % 10)) -eq 0 ]; then
echo "dynamic-ce is not completed in $i seconds"
fi
sleep 1
fi
done
kubectl get pods --all-namespaces
apps=("dynamic-ce" "juicefs-csi-node" "juicefs-csi-controller" "juicefs-chart-testing-control-plane-pvc" "redis" "minio")
for app in ${apps[@]}; do
echo app is $app
kubectl get pods --all-namespaces | grep $app | grep -i "Running\|Completed"
if [ $? != 0 ]; then
echo status of $app is not expected.
exit 1
fi
done
- name: Check mount pod
if: always()
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-chart-testing-control-plane-pvc)
echo POD_NAME is $POD_NAME
for pod in $POD_NAME;do
kubectl -n kube-system describe po $pod
kubectl logs -n kube-system $pod > juicefs.log
cat juicefs.log
grep "<FATAL>:" juicefs.log | grep -v forma.go && exit 1 || true
done
- name: Mount pod upgrade
timeout-minutes: 5
run: |
chaos=${{matrix.chaos}}
skip_conditions=("minio-io")
if [[ "${skip_conditions[*]}" =~ "$chaos" ]]; then
echo "skip mount pod upgrade"
exit 0
else
CSI_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-csi-node)
PVC_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-chart-testing-control-plane-pvc)
kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME 2>&1 | tee upgrade.log
sleep 5
if ! grep "SUCCESS" upgrade.log;then exit -1;fi
rm upgrade.log
kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME --restart 2>&1 | tee upgrade.log || true
sleep 5
fi
kubectl delete -f .github/scripts/chaos/workflow.yaml
- name: Check csi controller log
if: always()
run: |
kubectl describe pvc dynamic-ce
kubectl -n kube-system get po -l app=juicefs-csi-controller
kubectl -n kube-system logs juicefs-csi-controller-0 juicefs-plugin
- name: Check csi node log
if: always()
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-csi-node)
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl -n kube-system logs $POD_NAME -c juicefs-plugin > csi_node.log
cat csi_node.log
# grep -i "error" csi_node.log && exit 1 || true
- name: Check mount point pod
if: always()
run: |
POD_NAME=$(kubectl get pods -n kube-system | grep juicefs-chart-testing-control-plane-pvc | grep Running | awk '{print $1}')
echo POD_NAME is $POD_NAME
for pod in $POD_NAME;do
kubectl -n kube-system describe po $pod
kubectl logs -n kube-system $pod > juicefs.log
cat juicefs.log
grep "<FATAL>:" juicefs.log | grep -v forma.go && exit 1 || true
done
- name: Check vdbench log
if: always()
run: |
POD_NAME=$(kubectl get pods -n default -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep dynamic-ce )
echo POD_NAME is $POD_NAME
kubectl -n default describe po $POD_NAME
kubectl logs -n default $POD_NAME > vdbench.log
cat vdbench.log
# grep -i "error" vdbench.log && exit 1 || true
- name: Check Redis log
if: always()
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep redis )
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl logs -n kube-system $POD_NAME > redis.log
cat redis.log
# grep -i "error" redis.log && exit 1 || true
- name: Check Minio log
if: always()
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep minio )
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl logs -n kube-system $POD_NAME > minio.log
cat minio.log
# grep -i "error" minio.log && exit 1 || true
- name: Setup upterm session
if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
# if: failure()
timeout-minutes: 60
uses: lhotari/action-upterm@v1
success-all-test:
runs-on: ubuntu-latest
needs: [chaos-test]
if: always()
steps:
- uses: technote-space/workflow-conclusion-action@v3
- uses: actions/checkout@v3
- name: Check Failure
if: env.WORKFLOW_CONCLUSION == 'failure'
run: exit 1
- name: Send Slack Notification
if: failure() && github.event_name != 'workflow_dispatch'
uses: juicedata/slack-notify-action@main
with:
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"
- name: Success
if: success()
run: echo "All Done"