Skip to content

Commit 6e1b138

Browse files
committed
Last version of splunk resource usage
Signed-off-by: Alberto Losada <alosadag@redhat.com>
1 parent fb68895 commit 6e1b138

File tree

3 files changed

+67
-6
lines changed

3 files changed

+67
-6
lines changed

splunk/output-example.tgz

6.37 MB
Binary file not shown.

splunk/resource-usage-splunk.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
SNO_HOSTNAME="${1:-zt-sno3}"
44
NON_RESERVED_CORES="2-31,34-63"
5+
NUMBER_CORES=64
56

6-
# NODE CPU AVG
7-
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[30m])) by (instance)' | jq -r '.data.result[] | [.value[0], .value[1], .metric.instance] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_avg_node_noidle_cpu_percentage.txt
7+
#* NODE CPU AVG
8+
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100/64 * sum(1 - rate(node_cpu_seconds_total{mode="idle"}[30m])) by (instance)' | jq -r '.data.result[] | [.value[0], .value[1], .metric.instance] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_avg_node_noidle_cpu_percentage.txt
9+
#oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100/64 * max(1 - rate(node_cpu_seconds_total{mode="idle"}[30m])) by (instance)' | jq -r '.data.result[] | [.value[0], .value[1], .metric.instance] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_max_node_noidle_cpu_percentage.txt
810

9-
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100 * max(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance)' | jq -r '.data.result[] | [.value[0], .value[1], .metric.instance] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_max_node_noidle_cpu_percentage.txt
10-
11-
# NODE MEMORY
12-
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100 * (1 - (sum(avg_over_time(node_memory_MemAvailable_bytes{job="node-exporter"}[30m]))/sum(avg_over_time(node_memory_MemTotal_bytes{job="node-exporter"}[30m]))))' | jq -r '.data.result[] | [.value[0], .value[1]] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_node_percentage_free_memory.txt
11+
#* NODE MEMORY
12+
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=100 * (1 - (sum(avg_over_time(node_memory_MemAvailable_bytes[30m]))/sum(avg_over_time(node_memory_MemTotal_bytes[30m]))))' | jq -r '.data.result[] | [.value[0], .value[1]] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_node_percentage_free_memory.txt
1313

1414
# CONTAINER CPU USAGE SLICES
1515
oc --kubeconfig=/root/${SNO_HOSTNAME}/kubeconfig rsh -n openshift-monitoring prometheus-k8s-0 curl -ks 'http://localhost:9090/api/v1/query' --data-urlencode 'query=sort_desc((rate(container_cpu_usage_seconds_total{id=~"/system.slice/.+"}[30m])))' | jq -r '.data.result[] | [.value[0], .value[1], .metric.cpu, .metric.service, .metric.id] | @tsv' | sed 's/\t/ /g' >> ${SNO_HOSTNAME}_system-slice-cpu.txt

splunk/splunk_queries.txt

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
TOP USAGE BY NS
2+
===============
3+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu_pod" | stats avg(cpu_usage) as cpu_sec_usage by namespace pod | eval cpu_sec_usage=round(cpu_sec_usage,4) | stats sum(cpu_sec_usage) as cpu_sec_usage_ns by namespace | sort -cpu_sec_usage_ns | head 20
4+
5+
NODE CPU
6+
========
7+
source="/root/git/faredge-ztp/scripts/*avg_node_noidle_cpu_percentage.txt" index="resource_usage" sourcetype="network_mbps" | eval rtx=rtx*(64/100)| timechart sum(rtx) as total_cpu_seconds
8+
source="/root/git/faredge-ztp/scripts/*avg_node_noidle_cpu_percentage.txt" host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="network_mbps" | timechart sum(rtx)
9+
10+
source="/root/git/faredge-ztp/scripts/*avg_node_noidle_cpu_percentage.txt" index="resource_usage" sourcetype="network_mbps" | stats max(rtx) as max_cpu_usage_%
11+
12+
13+
SLICES
14+
=======
15+
index=resource_usage sourcetype="cpu.slices" | rex field=source "/root/git/faredge-ztp/scripts/zt-sno3_(?<source>.*)-cpu.txt" | timechart sum(cpu_usage) by source
16+
index=resource_usage sourcetype="cpu.slices" slice_id=*system.slice* | eval cpu_usage=cpu_usage*(100/64) | timechart sum(cpu_usage) as cpu_usage_total_%
17+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" slice_id=*ovs.slice* | rex "/ovs.slice/(?<slice_id>[^\s]+)" | timechart sum(cpu_usage) by slice_id
18+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" slice_id!=*ovs.slice* | rex "/system.slice/(?<slice_id>[^\s]+)" | eval cpu_usage=round(cpu_usage,5) | stats max(cpu_usage) as cpu_sec_usage by slice_id | sort -cpu_sec_usage | head 10
19+
20+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" | timechart span=30m sum(cpu_usage) by slice_id
21+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" | timechart span=30m sum(cpu_usage) as cpu_usage_%
22+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" | stats max(cpu_usage) by slice_id
23+
24+
25+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu.slices" slice_id=*ovs.slice* | rex "/ovs.slice/(?<slice_id>[^\s]+)" | timechart span=30m sum(cpu_usage)
26+
27+
NODE_MEMORY
28+
============
29+
index="resource_usage" sourcetype="memory_node_usage" | timechart span=30m sum(memory_usage) as memory_usage_% span=30m
30+
31+
CPU_POD
32+
=======
33+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu_pod" namespace=openshift-ptp OR namespace=openshift-sriov-network-operator | timechart span=30m sum(cpu_usage) by namespace span=30m
34+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu_pod" namespace=openshift-local-storage | eval cpu_usage=round(cpu_usage,5) | timechart span=30m sum(cpu_usage) as cpu_usage by namespace span=30m
35+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu_pod" | timechart span=30m sum(cpu_usage) as all_pods_cpu_sec span=30m
36+
host="jumphost.inbound.vz.bos2.lab" index="resource_usage" sourcetype="cpu_pod" | timechart span=30m sum(cpu_usage) as all_pods_cpu_sec by namespace span=30m| sort by -all_pods_cpu_sec
37+
38+
CPU_THROTTLED
39+
=============
40+
index=* sourcetype="*" sourcetype=cpu_cfs_throttled_periods_total container!="" throttled_periods!=0
41+
42+
43+
CPU_RESERVED
44+
============
45+
index="resource_usage" sourcetype=cpu_usage_reserved | timechart span=30m sum(cpu_usage)
46+
index="resource_usage" sourcetype=cpu_usage_reserved mode!=idle | eval cpu_usage=cpu_usage*(100/4) |timechart span=30m sum(cpu_usage) as cpu_usage_% | stats max(cpu_usage_%) as max_cpu_usage_%
47+
48+
49+
POD MEMORY
50+
=========
51+
index=resource_usage sourcetype=memory_pod_bytes source="*memory-rss-bytes*" | timechart span=30m sum(memory_bytes) by pod span=30m
52+
index=resource_usage sourcetype=memory_pod_bytes source="*memory-working-set-bytes*" | timechart span=30m sum(memory_bytes) by pod span=30m
53+
index=resource_usage sourcetype=memory_pod_bytes source=*rss* pod!="" | eval memory_bytes=(memory_bytes/(1024*1024*1024)) |rex field=source "/root/git/faredge-ztp/scripts/zt-sno3_(?<source>.*).txt" | timechart sum(memory_bytes) by pod
54+
55+
NODE NETWORKING
56+
==========
57+
index="resource_usage" sourcetype=network_mbps source=*node-network* source=*transmit* | timechart span=30m sum(rtx) as transmit
58+
59+
CONTAINER NETWORKING
60+
====================
61+
index=resource_usage sourcetype=network_container_bytes source=*receive* | rex field=source "/root/git/faredge-ztp/scripts/zt-sno3_(?<source>.*)-bytes.total.txt" | eval tx_bytes=tx_bytes*((8/(1024*1024))) | rex field=namespace "openshift-(?<namespace>.*)" | timechart span=30m sum(tx_bytes) as sum_tx_bytes by namespace

0 commit comments

Comments
 (0)