-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefaults.yml
327 lines (310 loc) · 11.2 KB
/
defaults.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
parameters:
rook_ceph:
=_metadata: {}
namespace: syn-rook-ceph-operator
ceph_cluster:
name: cluster
namespace: syn-rook-ceph-${rook_ceph:ceph_cluster:name}
node_count: 3
block_storage_class: localblock
# Configure volume size here, if block storage PVs are provisioned
# dynamically
block_volume_size: 1
# set to true if backing storage is SSD
tune_fast_device_class: false
# Control placement of osd pods.
osd_placement: {}
# Mark OSDs as portable (doesn't bind OSD to a host)
osd_portable: false
# Rendered into rook-config-override CM
config_override:
global:
# Configure full ratios to match the alerts shipped with Rook.
# With this config the cluster goes readonly at 85% utilization.
# These configs only apply at cluster creation.
# To adjust the ratios at run time, use
# `ceph osd set-{nearfull,backfillfull,full}-ratio`
# NOTE: we're giving ratios as strings to avoid float rounding
# issues when manifesting the values in the resulting config file.
mon_osd_full_ratio: '0.85'
mon_osd_backfillfull_ratio: '0.8'
mon_osd_nearfull_ratio: '0.75'
# Adjust mon data store free percentage to match the default
# Kubernetes imageGCHigh threshold, since we're using the node's
# `/var` for the mon data store.
mon_data_avail_warn: '15'
# Whether to setup RBD CSI driver and pools
rbd_enabled: true
# Whether to setup CephFS CSI driver and pools
cephfs_enabled: false
# Whether to enable monitoring
monitoring_enabled: true
storageClassDeviceSets:
cluster:
name: ${rook_ceph:ceph_cluster:name}
count: ${rook_ceph:ceph_cluster:node_count}
volumeClaimTemplates:
default:
spec:
storageClassName: ${rook_ceph:ceph_cluster:block_storage_class}
volumeMode: Block
accessModes:
- ReadWriteOnce
resources:
requests:
storage: ${rook_ceph:ceph_cluster:block_volume_size}
encrypted: true
tuneFastDeviceClass: ${rook_ceph:ceph_cluster:tune_fast_device_class}
placement: ${rook_ceph:ceph_cluster:osd_placement}
portable: ${rook_ceph:ceph_cluster:osd_portable}
storage_pools:
rbd:
storagepool:
config:
failureDomain: host
replicated:
size: 3
requireSafeReplicaSize: true
mount_options:
discard: true
storage_class_config:
parameters:
csi.storage.k8s.io/fstype: ext4
allowVolumeExpansion: true
cephfs:
fspool:
data_pools:
pool0:
failureDomain: host
replicated:
size: 3
requireSafeReplicaSize: true
parameters:
compression_mode: none
target_size_ratio: '0.8'
config:
metadataPool:
replicated:
size: 3
requireSafeReplicaSize: true
parameters:
compression_mode: none
target_size_ratio: '0.2'
# dataPools rendered from data_pools in Jsonnet
preserveFilesystemOnDelete: true
metadataServer:
activeCount: 1
activeStandby: true
resources:
requests:
cpu: "1"
memory: 4Gi
limits:
cpu: "1"
memory: 4Gi
# metadata server placement done in Jsonnet but can be
# extended here
mirroring:
enabled: false
mount_options: {}
storage_class_config:
allowVolumeExpansion: true
alerts:
# Ceph alerts to ignore
# The component supports removal of entries from this array by
# giving the entry prefixed with `~` (same syntax as for the
# applications array).
ignoreNames:
# Ignored because the Kubernetes monitoring should already provide node disk usage alerts
- CephMonDiskspaceCritical
- CephMonDiskspaceLow
- CephNodeDiskspaceWarning
# Managed by the cluster, fires on OCP but seems to be a false positive
- CephNodeInconsistentMTU
# We tend to run Ceph on virtual discs so we don't have S.M.A.R.T. information
- CephDeviceFailurePredicted
- CephDeviceFailurePredictionTooHigh
- CephDeviceFailureRelocationIncomplete
# Offline OSD host should be covered by cluster-level alerts
- CephOSDHostDown
# Clock skew should be covered by cluster-level alert
- CephMonClockSkew
# Network packet drops/errors and osd timeouts should be covered by
# cluster-level alerts, we don't need to have duplicates for Ceph
# nodes or OSDs
- CephNodeNetworkPacketDrops
- CephNodeNetworkPacketErrors
- CephOSDTimeoutsClusterNetwork
- CephOSDTimeoutsPublicNetwork
# All the following alerts should be covered by CephHealthWarning
- CephOSDInternalDiskSizeMismatch
- CephOSDInternalDiskSizeMismatch
- CephFilesystemInsufficientStandby
- CephFilesystemMDSRanksLow
- CephOSDTooManyRepairs
- CephOSDBackfillFull
- CephOSDReadErrors
- CephOSDNearFull
- CephPGNotDeepScrubbed
- CephPGNotScrubbed
- CephPGsHighPerOSD
- CephPGsUnclean
- CephPoolBackfillFull
- CephPoolNearFull
# The following alerts are not valuable enough to have active by
# default
- CephPGImbalance
- CephPoolGrowthWarning
- CephSlowOps
# Alert rule patches.
# Provide partial objects for alert rules that need to be tuned compared to
# upstream. The keys in this object correspond to the `alert` field of the
# rule for which the patch is intended.
patchRules:
CephClusterWarningState:
for: 15m
CephOSDDiskNotResponding:
for: 5m
# Supports configuring recording/alerting rules by using the
# "record:" and "alert:" prefixes.
additionalRules:
"alert:RookCephOperatorScaledDown":
expr: kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="${rook_ceph:namespace}"} == 0
for: 1h
annotations:
summary: rook-ceph operator scaled to 0 for more than 1 hour.
labels:
severity: warning
"record:ceph_osd_op_w_in_bytes:rate5m":
expr: sum(rate(ceph_osd_op_w_in_bytes{}[5m]))
"record:ceph_osd_op_r_out_bytes:rate5m":
expr: sum(rate(ceph_osd_op_r_out_bytes{}[5m]))
"record:ceph_pool_objects:sum":
expr: sum(ceph_pool_objects{})
"record:ceph_mon_num_sessions:sum":
expr: sum(ceph_mon_num_sessions{})
"record:ceph_mon_quorum_status:count":
expr: count(ceph_mon_quorum_status{})
"record:ceph_osd_op_w_in_bytes:sum":
expr: sum(ceph_osd_op_w_in_bytes{})
"record:ceph_osd_op_r_out_bytes:sum":
expr: sum(ceph_osd_op_r_out_bytes{})
"record:ceph_osd_numpg:sum":
expr: sum(ceph_osd_numpg{})
"record:ceph_osd_apply_latency_ms:avg":
expr: avg(ceph_osd_apply_latency_ms{})
"record:ceph_osd_commit_latency_ms:avg":
expr: avg(ceph_osd_commit_latency_ms{})
"record:ceph_osd_op_w_latency:avg5m":
expr: avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)
"record:ceph_osd_op_r_latency:avg5m":
expr: avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)
"record:ceph_osd_op_w:rate5m":
expr: sum(rate(ceph_osd_op_w{}[5m]))
"record:ceph_osd_op_r:rate5m":
expr: sum(rate(ceph_osd_op_r{}[5m]))
node_selector:
node-role.kubernetes.io/storage: ''
tolerations:
- key: storagenode
operator: Exists
images:
rook:
registry: docker.io
image: rook/ceph
tag: v1.16.0
ceph:
registry: quay.io
image: ceph/ceph
tag: v18.2.4
cephcsi:
registry: quay.io
image: cephcsi/cephcsi
tag: v3.12.3
kubectl:
registry: docker.io
image: bitnami/kubectl
tag: '1.28.15@sha256:a6db53e1b3829dd55e050dcd219f7e4e1c9d833b2e60d5ef8a747d7a119ed7d7'
charts:
# We do not support helm chart versions older than v1.7.0
rook-ceph: v1.14.10
operator_helm_values:
image:
repository: ${rook_ceph:images:rook:registry}/${rook_ceph:images:rook:image}
tag: ${rook_ceph:images:rook:tag}
nodeSelector: ${rook_ceph:node_selector}
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 750m
memory: 512Mi
tolerations: ${rook_ceph:tolerations}
csi:
provisionerTolerations: ${rook_ceph:tolerations}
enableCSIHostNetwork: true
enableRbdDriver: ${rook_ceph:ceph_cluster:rbd_enabled}
enableCephfsDriver: ${rook_ceph:ceph_cluster:cephfs_enabled}
enableGrpcMetrics: true
enableLiveness: true
cephcsi:
repository: ${rook_ceph:images:cephcsi:registry}/${rook_ceph:images:cephcsi:image}
tag: ${rook_ceph:images:cephcsi:tag}
pspEnable: false
toolbox:
enabled: true
image: ${rook_ceph:images:rook}
cephClusterSpec:
cephVersion:
image: ${rook_ceph:images:ceph:registry}/${rook_ceph:images:ceph:image}:${rook_ceph:images:ceph:tag}
allowUnsupported: false
dataDirHostPath: /var/lib/rook
healthCheck:
daemonHealth:
mon:
disabled: false
interval: 45s
timeout: 600s
monitoring:
enabled: ${rook_ceph:ceph_cluster:monitoring_enabled}
mon:
count: 3
allowMultiplePerNode: false
network:
provider: host
placement:
all:
# nodeAffinity is injected in Jsonnet,
# taking placement labels from ${rook_ceph:node_selector}
tolerations: ${rook_ceph:tolerations}
resources:
mgr:
limits:
cpu: "500m"
memory: "1Gi"
requests:
cpu: "250m"
memory: "512Mi"
mon:
limits:
cpu: "500m"
memory: 2Gi
requests:
cpu: "250m"
memory: 2Gi
osd:
limits:
cpu: "2"
memory: 5Gi
requests:
cpu: "2"
memory: 5Gi
storage:
useAllNodes: false
useAllDevices: false
# storageClassDeviceSets is generated by parameter
# `ceph_cluster.storageClassDeviceSets`
disruptionManagement:
managePodBudgets: true
osdMaintenanceTimeout: 30