class/defaults.yml

parameters:
  rook_ceph:
    =_metadata: {}
    namespace: syn-rook-ceph-operator

    ceph_cluster:
      name: cluster
      namespace: syn-rook-ceph-${rook_ceph:ceph_cluster:name}
      node_count: 3
      block_storage_class: localblock
      # Configure volume size here, if block storage PVs are provisioned
      # dynamically
      block_volume_size: 1
      # set to true if backing storage is SSD
      tune_fast_device_class: false
      # Control placement of osd pods.
      osd_placement: {}
      # Mark OSDs as portable (doesn't bind OSD to a host)
      osd_portable: false

      # Rendered into rook-config-override CM
      config_override:
        global:
          # Configure full ratios to match the alerts shipped with Rook.
          # With this config the cluster goes readonly at 85% utilization.
          # These configs only apply at cluster creation.
          # To adjust the ratios at run time, use
          #   `ceph osd set-{nearfull,backfillfull,full}-ratio`
          # NOTE: we're giving ratios as strings to avoid float rounding
          # issues when manifesting the values in the resulting config file.
          mon_osd_full_ratio: '0.85'
          mon_osd_backfillfull_ratio: '0.8'
          mon_osd_nearfull_ratio: '0.75'
          # Adjust mon data store free percentage to match the default
          # Kubernetes imageGCHigh threshold, since we're using the node's
          # `/var` for the mon data store.
          mon_data_avail_warn: '15'

      # Whether to setup RBD CSI driver and pools
      rbd_enabled: true
      # Whether to setup CephFS CSI driver and pools
      cephfs_enabled: false
      # Whether to enable monitoring
      monitoring_enabled: true

      storageClassDeviceSets:
        cluster:
          name: ${rook_ceph:ceph_cluster:name}
          count: ${rook_ceph:ceph_cluster:node_count}
          volumeClaimTemplates:
            default:
              spec:
                storageClassName: ${rook_ceph:ceph_cluster:block_storage_class}
                volumeMode: Block
                accessModes:
                  - ReadWriteOnce
                resources:
                  requests:
                    storage: ${rook_ceph:ceph_cluster:block_volume_size}
          encrypted: true
          tuneFastDeviceClass: ${rook_ceph:ceph_cluster:tune_fast_device_class}
          placement: ${rook_ceph:ceph_cluster:osd_placement}
          portable: ${rook_ceph:ceph_cluster:osd_portable}


      storage_pools:
        rbd:
          storagepool:
            config:
              failureDomain: host
              replicated:
                size: 3
                requireSafeReplicaSize: true
            mount_options:
              discard: true
            storage_class_config:
              parameters:
                csi.storage.k8s.io/fstype: ext4
              allowVolumeExpansion: true
        cephfs:
          fspool:
            data_pools:
              pool0:
                failureDomain: host
                replicated:
                  size: 3
                  requireSafeReplicaSize: true
                parameters:
                  compression_mode: none
                  target_size_ratio: '0.8'
            config:
              metadataPool:
                replicated:
                  size: 3
                  requireSafeReplicaSize: true
                parameters:
                  compression_mode: none
                  target_size_ratio: '0.2'
              # dataPools rendered from data_pools in Jsonnet
              preserveFilesystemOnDelete: true
              metadataServer:
                activeCount: 1
                activeStandby: true
                resources:
                  requests:
                    cpu: "1"
                    memory: 4Gi
                  limits:
                    cpu: "1"
                    memory: 4Gi
                # metadata server placement done in Jsonnet but can be
                # extended here
              mirroring:
                enabled: false
            mount_options: {}
            storage_class_config:
              allowVolumeExpansion: true

    alerts:
      # Ceph alerts to ignore
      # The component supports removal of entries from this array by
      # giving the entry prefixed with `~` (same syntax as for the
      # applications array).
      ignoreNames:
        # Ignored because the Kubernetes monitoring should already provide node disk usage alerts
        - CephMonDiskspaceCritical
        - CephMonDiskspaceLow
        - CephNodeDiskspaceWarning
        # Managed by the cluster, fires on OCP but seems to be a false positive
        - CephNodeInconsistentMTU
        # We tend to run Ceph on virtual discs so we don't have S.M.A.R.T. information
        - CephDeviceFailurePredicted
        - CephDeviceFailurePredictionTooHigh
        - CephDeviceFailureRelocationIncomplete
        # Offline OSD host should be covered by cluster-level alerts
        - CephOSDHostDown
        # Clock skew should be covered by cluster-level alert
        - CephMonClockSkew
        # Network packet drops/errors and osd timeouts should be covered by
        # cluster-level alerts, we don't need to have duplicates for Ceph
        # nodes or OSDs
        - CephNodeNetworkPacketDrops
        - CephNodeNetworkPacketErrors
        - CephOSDTimeoutsClusterNetwork
        - CephOSDTimeoutsPublicNetwork
        # All the following alerts should be covered by CephHealthWarning
        - CephOSDInternalDiskSizeMismatch
        - CephOSDInternalDiskSizeMismatch
        - CephFilesystemInsufficientStandby
        - CephFilesystemMDSRanksLow
        - CephOSDTooManyRepairs
        - CephOSDBackfillFull
        - CephOSDReadErrors
        - CephOSDNearFull
        - CephPGNotDeepScrubbed
        - CephPGNotScrubbed
        - CephPGsHighPerOSD
        - CephPGsUnclean
        - CephPoolBackfillFull
        - CephPoolNearFull
        # The following alerts are not valuable enough to have active by
        # default
        - CephPGImbalance
        - CephPoolGrowthWarning
        - CephSlowOps

      # Alert rule patches.
      # Provide partial objects for alert rules that need to be tuned compared to
      # upstream. The keys in this object correspond to the `alert` field of the
      # rule for which the patch is intended.
      patchRules:
        CephClusterWarningState:
          for: 15m
        CephOSDDiskNotResponding:
          for: 5m

      # Supports configuring recording/alerting rules by using the
      # "record:" and "alert:" prefixes.
      additionalRules:
        "alert:RookCephOperatorScaledDown":
          expr: kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="${rook_ceph:namespace}"} == 0
          for: 1h
          annotations:
            summary: rook-ceph operator scaled to 0 for more than 1 hour.
          labels:
            severity: warning
        "record:ceph_osd_op_w_in_bytes:rate5m":
          expr: sum(rate(ceph_osd_op_w_in_bytes{}[5m]))
        "record:ceph_osd_op_r_out_bytes:rate5m":
          expr: sum(rate(ceph_osd_op_r_out_bytes{}[5m]))
        "record:ceph_pool_objects:sum":
          expr: sum(ceph_pool_objects{})
        "record:ceph_mon_num_sessions:sum":
          expr: sum(ceph_mon_num_sessions{})
        "record:ceph_mon_quorum_status:count":
          expr: count(ceph_mon_quorum_status{})
        "record:ceph_osd_op_w_in_bytes:sum":
          expr: sum(ceph_osd_op_w_in_bytes{})
        "record:ceph_osd_op_r_out_bytes:sum":
          expr: sum(ceph_osd_op_r_out_bytes{})
        "record:ceph_osd_numpg:sum":
          expr: sum(ceph_osd_numpg{})
        "record:ceph_osd_apply_latency_ms:avg":
          expr: avg(ceph_osd_apply_latency_ms{})
        "record:ceph_osd_commit_latency_ms:avg":
          expr: avg(ceph_osd_commit_latency_ms{})
        "record:ceph_osd_op_w_latency:avg5m":
          expr: avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)
        "record:ceph_osd_op_r_latency:avg5m":
          expr: avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)
        "record:ceph_osd_op_w:rate5m":
          expr: sum(rate(ceph_osd_op_w{}[5m]))
        "record:ceph_osd_op_r:rate5m":
          expr: sum(rate(ceph_osd_op_r{}[5m]))

    node_selector:
      node-role.kubernetes.io/storage: ''

    tolerations:
      - key: storagenode
        operator: Exists

    images:
      rook:
        registry: docker.io
        image: rook/ceph
        tag: v1.16.0
      ceph:
        registry: quay.io
        image: ceph/ceph
        tag: v18.2.4
      cephcsi:
        registry: quay.io
        image: cephcsi/cephcsi
        tag: v3.12.3
      kubectl:
        registry: docker.io
        image: bitnami/kubectl
        tag: '1.28.15@sha256:a6db53e1b3829dd55e050dcd219f7e4e1c9d833b2e60d5ef8a747d7a119ed7d7'

    charts:
      # We do not support helm chart versions older than v1.7.0
      rook-ceph: v1.14.10

    operator_helm_values:
      image:
        repository: ${rook_ceph:images:rook:registry}/${rook_ceph:images:rook:image}
        tag: ${rook_ceph:images:rook:tag}
      nodeSelector: ${rook_ceph:node_selector}
      resources:
        limits:
          cpu: 1000m
          memory: 1Gi
        requests:
          cpu: 750m
          memory: 512Mi
      tolerations: ${rook_ceph:tolerations}
      csi:
        provisionerTolerations: ${rook_ceph:tolerations}
        enableCSIHostNetwork: true
        enableRbdDriver: ${rook_ceph:ceph_cluster:rbd_enabled}
        enableCephfsDriver: ${rook_ceph:ceph_cluster:cephfs_enabled}
        enableGrpcMetrics: true
        enableLiveness: true
        cephcsi:
          repository: ${rook_ceph:images:cephcsi:registry}/${rook_ceph:images:cephcsi:image}
          tag: ${rook_ceph:images:cephcsi:tag}
      pspEnable: false

    toolbox:
      enabled: true
      image: ${rook_ceph:images:rook}

    cephClusterSpec:
      cephVersion:
        image: ${rook_ceph:images:ceph:registry}/${rook_ceph:images:ceph:image}:${rook_ceph:images:ceph:tag}
        allowUnsupported: false
      dataDirHostPath: /var/lib/rook
      healthCheck:
        daemonHealth:
          mon:
            disabled: false
            interval: 45s
            timeout: 600s
      monitoring:
        enabled: ${rook_ceph:ceph_cluster:monitoring_enabled}
      mon:
        count: 3
        allowMultiplePerNode: false
      network:
        provider: host
      placement:
        all:
          # nodeAffinity is injected in Jsonnet,
          # taking placement labels from ${rook_ceph:node_selector}
          tolerations: ${rook_ceph:tolerations}
      resources:
        mgr:
          limits:
            cpu: "500m"
            memory: "1Gi"
          requests:
            cpu: "250m"
            memory: "512Mi"
        mon:
          limits:
            cpu: "500m"
            memory: 2Gi
          requests:
            cpu: "250m"
            memory: 2Gi
        osd:
          limits:
            cpu: "2"
            memory: 5Gi
          requests:
            cpu: "2"
            memory: 5Gi
      storage:
        useAllNodes: false
        useAllDevices: false
        # storageClassDeviceSets is generated by parameter
        # `ceph_cluster.storageClassDeviceSets`

      disruptionManagement:
        managePodBudgets: true
        osdMaintenanceTimeout: 30