From d2464242e99f43d3b415c6ac2e54cc783cfa395f Mon Sep 17 00:00:00 2001 From: Shaobo Zhang <1171337+shaobo76@users.noreply.github.com> Date: Mon, 6 Mar 2023 19:38:06 +0800 Subject: [PATCH] Remove escape character to fix broken grafana dashboard configuration Signed-off-by: Shaobo Zhang <1171337+shaobo76@users.noreply.github.com> --- installer/helm/chart/volcano/templates/grafana.yaml | 2 +- installer/volcano-monitoring-latest.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/installer/helm/chart/volcano/templates/grafana.yaml b/installer/helm/chart/volcano/templates/grafana.yaml index 3fa8c5a21a..6499bad957 100644 --- a/installer/helm/chart/volcano/templates/grafana.yaml +++ b/installer/helm/chart/volcano/templates/grafana.yaml @@ -46,7 +46,7 @@ metadata: namespace: {{ .Release.Namespace }} data: volcano-globcal-overview-dashboard.json: |- - {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":2,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":20,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(max_over_time(kube_pod_container_status_running{job=\"kube-state-metrics\"}[1h]) != 0)","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"TPH –Schedule Task In 1 Hour","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":21,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_info{job=\"kube-state-metrics\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Node","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"kube_node_status_capacity{resource=\"nvidia_com_gpu\",job=\"kube-state-metrics\"}","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":24,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"memory\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":22,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"cpu\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano CPU","type":"stat"},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateOranges","exponent":0.5,"mode":"spectrum"},"dataFormat":"timeseries","datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":16,"x":0,"y":5},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":18,"legend":{"show":false},"pluginVersion":"7.3.4","reverseYBuckets":false,"targets":[{"expr":"increase(volcano_e2e_job_scheduling_latency_milliseconds_bucket[1h])","format":"heatmap","instant":false,"interval":"","legendFormat":"\{\{le\}\} ms","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Legency Heatmap","tooltip":{"show":true,"showHistogram":false},"transformations":[],"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":null,"format":"ms","logBase":2,"max":"500000","min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":7,"w":16,"x":0,"y":13},"id":26,"options":{"displayMode":"lcd","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showUnfilled":true},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (queue)","interval":"","legendFormat":"\{\{queue\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Scheduling Avg Duration By Queue In 24H","type":"bargauge"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"ms"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"job_namespace"},"properties":[{"id":"custom.width","value":279}]}]},"gridPos":{"h":7,"w":16,"x":0,"y":20},"id":27,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (job_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace: \{\{job_namespace\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Avg Scheduling Duration By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":27},"id":29,"options":{"showHeader":true},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\",queue!=\"\"}) by (queue)","format":"table","instant":true,"interval":"","legendFormat":"\{\{queue\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Queue In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":35},"id":30,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\"}) by (volcano_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace : \{\{volcano_namespace\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.width","value":651},{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"},{"id":"thresholds","value":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}}]},{"matcher":{"id":"byName","options":"job_name"},"properties":[{"id":"custom.width","value":361}]},{"matcher":{"id":"byName","options":"Volcano Job"},"properties":[{"id":"custom.width","value":228}]}]},"gridPos":{"h":13,"w":16,"x":0,"y":43},"id":16,"options":{"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{}[24h]) != 0","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Running Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{"Time":"","job_name":"Volcano Job"}}}],"type":"table"},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":13,"panels":[],"title":"Volcano Fairness","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":57},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(volcano_e2e_job_scheduling_duration)/avg(volcano_e2e_job_scheduling_duration)","format":"time_series","intervalFactor":1,"legendFormat":"CV (Job Duration)","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Job Duration Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":64},"id":11,"panels":[],"title":"Volcano Effectiveness","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":0,"y":65},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(\n(sum(kube_pod_container_resource_requests{resource=\"cpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) \nby (pod,namespace)))/\nsum(kube_node_status_allocatable{resource=\"cpu\", unit=\"core\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average CPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":5,"y":65},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"memory\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average Memory Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":10,"y":65},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_capacity{resource=\"nvidia_com_gpu\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average GPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":73},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (CPU)","refId":"A"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Memory)","refId":"B"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Nvidia GPU)","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Node Resource Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-12h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"","title":"Volcano Global Overview Dashboard","uid":"nYn30KvMzf","version":19} + {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":2,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":20,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(max_over_time(kube_pod_container_status_running{job=\"kube-state-metrics\"}[1h]) != 0)","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"TPH –Schedule Task In 1 Hour","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":21,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_info{job=\"kube-state-metrics\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Node","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"kube_node_status_capacity{resource=\"nvidia_com_gpu\",job=\"kube-state-metrics\"}","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":24,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"memory\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":22,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"cpu\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano CPU","type":"stat"},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateOranges","exponent":0.5,"mode":"spectrum"},"dataFormat":"timeseries","datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":16,"x":0,"y":5},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":18,"legend":{"show":false},"pluginVersion":"7.3.4","reverseYBuckets":false,"targets":[{"expr":"increase(volcano_e2e_job_scheduling_latency_milliseconds_bucket[1h])","format":"heatmap","instant":false,"interval":"","legendFormat":"{{le}} ms","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Legency Heatmap","tooltip":{"show":true,"showHistogram":false},"transformations":[],"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":null,"format":"ms","logBase":2,"max":"500000","min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":7,"w":16,"x":0,"y":13},"id":26,"options":{"displayMode":"lcd","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showUnfilled":true},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (queue)","interval":"","legendFormat":"{{queue}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Scheduling Avg Duration By Queue In 24H","type":"bargauge"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"ms"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"job_namespace"},"properties":[{"id":"custom.width","value":279}]}]},"gridPos":{"h":7,"w":16,"x":0,"y":20},"id":27,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (job_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace: {{job_namespace}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Avg Scheduling Duration By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":27},"id":29,"options":{"showHeader":true},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\",queue!=\"\"}) by (queue)","format":"table","instant":true,"interval":"","legendFormat":"{{queue}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Queue In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":35},"id":30,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\"}) by (volcano_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace : {{volcano_namespace}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.width","value":651},{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"},{"id":"thresholds","value":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}}]},{"matcher":{"id":"byName","options":"job_name"},"properties":[{"id":"custom.width","value":361}]},{"matcher":{"id":"byName","options":"Volcano Job"},"properties":[{"id":"custom.width","value":228}]}]},"gridPos":{"h":13,"w":16,"x":0,"y":43},"id":16,"options":{"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{}[24h]) != 0","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Running Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{"Time":"","job_name":"Volcano Job"}}}],"type":"table"},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":13,"panels":[],"title":"Volcano Fairness","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":57},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(volcano_e2e_job_scheduling_duration)/avg(volcano_e2e_job_scheduling_duration)","format":"time_series","intervalFactor":1,"legendFormat":"CV (Job Duration)","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Job Duration Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":64},"id":11,"panels":[],"title":"Volcano Effectiveness","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":0,"y":65},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(\n(sum(kube_pod_container_resource_requests{resource=\"cpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) \nby (pod,namespace)))/\nsum(kube_node_status_allocatable{resource=\"cpu\", unit=\"core\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average CPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":5,"y":65},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"memory\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average Memory Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":10,"y":65},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_capacity{resource=\"nvidia_com_gpu\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average GPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":73},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (CPU)","refId":"A"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Memory)","refId":"B"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Nvidia GPU)","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Node Resource Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-12h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"","title":"Volcano Global Overview Dashboard","uid":"nYn30KvMzf","version":19} volcano-queue-overview-dashboard.json: |- {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":4,"iteration":1607928216980,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}==1)","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":16,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}==0)","instant":false,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Finished Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":17,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count((max_over_time(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}[10m]) != 0) and kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"} == 0)","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Last 10m Finished Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":7,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running CPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"gpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":15,"y":0},"id":2,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.width","value":195}]},{"matcher":{"id":"byName","options":"__name__"},"properties":[{"id":"custom.width","value":267}]},{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]}]},"gridPos":{"h":24,"w":12,"x":0,"y":5},"id":14,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{queue=\"$queue\"}[24h]) != 0 ","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Queue Running Job Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":5},"hiddenSeries":false,"id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"CPU Cores","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running CPU","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{},"unit":"bytes"},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":13},"hiddenSeries":false,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running Memory ","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":21},"hiddenSeries":false,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"gpu\",job=\"kube-state-metrics\",volcano_namespace=\"$namespace\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"GPU Cards","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running GPU","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[{"allValue":null,"current":{"selected":false,"text":"default","value":"default"},"datasource":"prometheus","definition":"label_values(volcano_queue_share,queue_name)","error":null,"hide":0,"includeAll":false,"label":null,"multi":false,"name":"queue","options":[],"query":"label_values(volcano_queue_share,queue_name)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{},"timezone":"","title":"Volcano Queue View","uid":"sAtQfo1Mk","version":8} volcano-namespace-overview-dashboard.json: |- diff --git a/installer/volcano-monitoring-latest.yaml b/installer/volcano-monitoring-latest.yaml index a0a378741c..70a88d34c3 100644 --- a/installer/volcano-monitoring-latest.yaml +++ b/installer/volcano-monitoring-latest.yaml @@ -500,7 +500,7 @@ metadata: namespace: volcano-monitoring data: volcano-globcal-overview-dashboard.json: |- - {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":2,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":20,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(max_over_time(kube_pod_container_status_running{job=\"kube-state-metrics\"}[1h]) != 0)","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"TPH –Schedule Task In 1 Hour","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":21,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_info{job=\"kube-state-metrics\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Node","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"kube_node_status_capacity{resource=\"nvidia_com_gpu\",job=\"kube-state-metrics\"}","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":24,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"memory\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":22,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"cpu\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano CPU","type":"stat"},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateOranges","exponent":0.5,"mode":"spectrum"},"dataFormat":"timeseries","datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":16,"x":0,"y":5},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":18,"legend":{"show":false},"pluginVersion":"7.3.4","reverseYBuckets":false,"targets":[{"expr":"increase(volcano_e2e_job_scheduling_latency_milliseconds_bucket[1h])","format":"heatmap","instant":false,"interval":"","legendFormat":"\{\{le\}\} ms","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Legency Heatmap","tooltip":{"show":true,"showHistogram":false},"transformations":[],"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":null,"format":"ms","logBase":2,"max":"500000","min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":7,"w":16,"x":0,"y":13},"id":26,"options":{"displayMode":"lcd","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showUnfilled":true},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (queue)","interval":"","legendFormat":"\{\{queue\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Scheduling Avg Duration By Queue In 24H","type":"bargauge"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"ms"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"job_namespace"},"properties":[{"id":"custom.width","value":279}]}]},"gridPos":{"h":7,"w":16,"x":0,"y":20},"id":27,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (job_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace: \{\{job_namespace\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Avg Scheduling Duration By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":27},"id":29,"options":{"showHeader":true},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\",queue!=\"\"}) by (queue)","format":"table","instant":true,"interval":"","legendFormat":"\{\{queue\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Queue In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":35},"id":30,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\"}) by (volcano_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace : \{\{volcano_namespace\}\}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.width","value":651},{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"},{"id":"thresholds","value":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}}]},{"matcher":{"id":"byName","options":"job_name"},"properties":[{"id":"custom.width","value":361}]},{"matcher":{"id":"byName","options":"Volcano Job"},"properties":[{"id":"custom.width","value":228}]}]},"gridPos":{"h":13,"w":16,"x":0,"y":43},"id":16,"options":{"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{}[24h]) != 0","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Running Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{"Time":"","job_name":"Volcano Job"}}}],"type":"table"},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":13,"panels":[],"title":"Volcano Fairness","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":57},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(volcano_e2e_job_scheduling_duration)/avg(volcano_e2e_job_scheduling_duration)","format":"time_series","intervalFactor":1,"legendFormat":"CV (Job Duration)","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Job Duration Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":64},"id":11,"panels":[],"title":"Volcano Effectiveness","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":0,"y":65},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(\n(sum(kube_pod_container_resource_requests{resource=\"cpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) \nby (pod,namespace)))/\nsum(kube_node_status_allocatable{resource=\"cpu\", unit=\"core\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average CPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":5,"y":65},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"memory\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average Memory Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":10,"y":65},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_capacity{resource=\"nvidia_com_gpu\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average GPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":73},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (CPU)","refId":"A"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Memory)","refId":"B"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Nvidia GPU)","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Node Resource Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-12h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"","title":"Volcano Global Overview Dashboard","uid":"nYn30KvMzf","version":19} + {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":2,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":20,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(max_over_time(kube_pod_container_status_running{job=\"kube-state-metrics\"}[1h]) != 0)","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"TPH –Schedule Task In 1 Hour","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":21,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_info{job=\"kube-state-metrics\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Node","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"kube_node_status_capacity{resource=\"nvidia_com_gpu\",job=\"kube-state-metrics\"}","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":24,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"memory\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":22,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"cpu\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano CPU","type":"stat"},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateOranges","exponent":0.5,"mode":"spectrum"},"dataFormat":"timeseries","datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":16,"x":0,"y":5},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":18,"legend":{"show":false},"pluginVersion":"7.3.4","reverseYBuckets":false,"targets":[{"expr":"increase(volcano_e2e_job_scheduling_latency_milliseconds_bucket[1h])","format":"heatmap","instant":false,"interval":"","legendFormat":"{{le}} ms","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Legency Heatmap","tooltip":{"show":true,"showHistogram":false},"transformations":[],"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":null,"format":"ms","logBase":2,"max":"500000","min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":50}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":7,"w":16,"x":0,"y":13},"id":26,"options":{"displayMode":"lcd","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showUnfilled":true},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (queue)","interval":"","legendFormat":"{{queue}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Scheduling Avg Duration By Queue In 24H","type":"bargauge"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"ms"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"job_namespace"},"properties":[{"id":"custom.width","value":279}]}]},"gridPos":{"h":7,"w":16,"x":0,"y":20},"id":27,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"avg(volcano_e2e_job_scheduling_duration{}) by (job_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace: {{job_namespace}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Avg Scheduling Duration By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":27},"id":29,"options":{"showHeader":true},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\",queue!=\"\"}) by (queue)","format":"table","instant":true,"interval":"","legendFormat":"{{queue}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Queue In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"}]}]},"gridPos":{"h":8,"w":16,"x":0,"y":35},"id":30,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_resource_requests{resource=\"memory\", unit=\"byte\",job=\"kube-state-metrics\"}) by (volcano_namespace)","format":"table","instant":true,"interval":"","legendFormat":"Namespace : {{volcano_namespace}}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Resource Usage Sort By Namespace In 24H","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"datasource":null,"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.width","value":651},{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"},{"id":"thresholds","value":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}}]},{"matcher":{"id":"byName","options":"job_name"},"properties":[{"id":"custom.width","value":361}]},{"matcher":{"id":"byName","options":"Volcano Job"},"properties":[{"id":"custom.width","value":228}]}]},"gridPos":{"h":13,"w":16,"x":0,"y":43},"id":16,"options":{"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{}[24h]) != 0","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Volcano Job Running Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{"Time":"","job_name":"Volcano Job"}}}],"type":"table"},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":13,"panels":[],"title":"Volcano Fairness","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":57},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(volcano_e2e_job_scheduling_duration)/avg(volcano_e2e_job_scheduling_duration)","format":"time_series","intervalFactor":1,"legendFormat":"CV (Job Duration)","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Job Duration Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":64},"id":11,"panels":[],"title":"Volcano Effectiveness","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":0,"y":65},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(\n(sum(kube_pod_container_resource_requests{resource=\"cpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) \nby (pod,namespace)))/\nsum(kube_node_status_allocatable{resource=\"cpu\", unit=\"core\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average CPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":5,"y":65},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"memory\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average Memory Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"format":"percentunit","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":5,"x":10,"y":65},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum((sum(kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}) by (pod,namespace)) * on(pod) (max(kube_pod_container_status_running{}) by (pod,namespace)))/sum(kube_node_status_capacity{resource=\"nvidia_com_gpu\"})","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"0.7,0.9","timeFrom":null,"timeShift":null,"title":"Volcano Cluster Average GPU Usage","transparent":true,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":16,"x":0,"y":73},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"cpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (CPU)","refId":"A"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"memory\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Memory)","refId":"B"},{"expr":"stddev(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))/avg(sum by (node) (kube_pod_container_resource_requests{resource=\"nvidia_com_gpu\"}))","format":"time_series","intervalFactor":1,"legendFormat":"CV (Nvidia GPU)","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Node Resource Coefficient Of Variation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":true,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-12h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"","title":"Volcano Global Overview Dashboard","uid":"nYn30KvMzf","version":19} volcano-queue-overview-dashboard.json: |- {"annotations":{"list":[{"builtIn":1,"datasource":"prometheus","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":4,"iteration":1607928216980,"links":[],"panels":[{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}==1)","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":3,"y":0},"id":16,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}==0)","instant":false,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Finished Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":3,"x":6,"y":0},"id":17,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"count((max_over_time(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}[10m]) != 0) and kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"} == 0)","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Last 10m Finished Job","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":9,"y":0},"id":7,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running CPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":12,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"gpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running GPU","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":15,"y":0},"id":2,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7.3.4","targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","instant":true,"interval":"","legendFormat":"volcano_job","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Running Memory","type":"stat"},{"datasource":null,"fieldConfig":{"defaults":{"custom":{"align":null,"filterable":false},"mappings":[],"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.width","value":195}]},{"matcher":{"id":"byName","options":"__name__"},"properties":[{"id":"custom.width","value":267}]},{"matcher":{"id":"byName","options":"Value"},"properties":[{"id":"custom.displayMode","value":"lcd-gauge"},{"id":"unit","value":"ms"}]}]},"gridPos":{"h":24,"w":12,"x":0,"y":5},"id":14,"options":{"showHeader":true,"sortBy":[{"desc":true,"displayName":"Value"}]},"pluginVersion":"7.3.4","targets":[{"expr":"increase(volcano_e2e_job_scheduling_duration{queue=\"$queue\"}[24h]) != 0 ","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Queue Running Job Legency","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"__name__":true,"instance":true,"job":true,"kubernetes_name":true,"kubernetes_namespace":true},"indexByName":{},"renameByName":{}}}],"type":"table"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":5},"hiddenSeries":false,"id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"CPU Cores","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running CPU","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{},"unit":"bytes"},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":13},"hiddenSeries":false,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\",queue=\"$queue\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running Memory ","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":21},"hiddenSeries":false,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(\n(sum(kube_pod_volcano_container_resource_requests{resource=\"gpu\",job=\"kube-state-metrics\",volcano_namespace=\"$namespace\"}) by (pod,namespace)) * on(pod) (max(kube_pod_volcano_container_status_running{job=\"kube-state-metrics\",queue=\"$queue\"}) \nby (pod,namespace))) ","interval":"","legendFormat":"GPU Cards","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queue Running GPU","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":26,"style":"dark","tags":[],"templating":{"list":[{"allValue":null,"current":{"selected":false,"text":"default","value":"default"},"datasource":"prometheus","definition":"label_values(volcano_queue_share,queue_name)","error":null,"hide":0,"includeAll":false,"label":null,"multi":false,"name":"queue","options":[],"query":"label_values(volcano_queue_share,queue_name)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{},"timezone":"","title":"Volcano Queue View","uid":"sAtQfo1Mk","version":8} volcano-namespace-overview-dashboard.json: |-