From aa988adb47833c0f8e20cd33dcf18dd23cc2c69b Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 3 Aug 2023 15:28:47 +0100 Subject: [PATCH] Add agent dashboards Signed-off-by: Michel Hollands --- .../src/dashboards/agent-logs-pipeline.json | 1082 ++++++++++++ .../src/dashboards/agent-operational.json | 1189 +++++++++++++ .../src/dashboards/agent-remote-write.json | 1512 +++++++++++++++++ .../dashboards/agent-tracing-pipeline.json | 1065 ++++++++++++ .../meta-monitoring/src/dashboards/agent.json | 786 +++++++++ .../templates/grafana/agent-dashboards-1.yaml | 19 + .../templates/grafana/dashboard.yaml | 8 + .../templates/grafana/grafana.yaml | 5 + 8 files changed, 5666 insertions(+) create mode 100644 charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json create mode 100644 charts/meta-monitoring/src/dashboards/agent-operational.json create mode 100644 charts/meta-monitoring/src/dashboards/agent-remote-write.json create mode 100644 charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json create mode 100644 charts/meta-monitoring/src/dashboards/agent.json create mode 100644 charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml diff --git a/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json b/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json new file mode 100644 index 0000000..305136a --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json @@ -0,0 +1,1082 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "height": 500, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (rate(promtail_dropped_bytes_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Dropped bytes rate [B/s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (rate(promtail_request_duration_seconds_bucket{status_code=~\"2..\", cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n/\nsum by($groupBy) (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Write requests success rate [%]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "%", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "%", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": 500, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n 0.990000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Write latencies p99 [s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n 0.900000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Write latencies p90 [s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n 0.500000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Write latencies p50 [s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum by (le, $groupBy) (rate(promtail_request_duration_seconds_sum{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])))\n/\n(sum by (le, $groupBy) (rate(promtail_request_duration_seconds_count{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Write latencies average [s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Latencies", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": 500, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (rate(promtail_read_bytes_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes read rate [B/s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (rate(promtail_read_lines_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Lines read rate [lines/s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (promtail_files_active_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"})\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Active files count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by($groupBy) (rate(promtail_sent_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{$groupBy}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Entries sent rate [entries/s]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Logs volume", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana-agent-mixin" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(agent_build_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(agent_build_info, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "container", + "options": [ ], + "query": "label_values(agent_build_info, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "pod", + "options": [ ], + "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "pod", + "value": "pod" + }, + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "groupBy", + "options": [ + { + "text": "pod", + "value": "pod" + }, + { + "text": "cluster", + "value": "cluster" + }, + { + "text": "namespace", + "value": "namespace" + } + ], + "query": "pod,cluster,namespace", + "refresh": 0, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agent Logs Pipeline", + "version": 0 +} diff --git a/charts/meta-monitoring/src/dashboards/agent-operational.json b/charts/meta-monitoring/src/dashboards/agent-operational.json new file mode 100644 index 0000000..6cc2a65 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/agent-operational.json @@ -0,0 +1,1189 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(go_gc_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GCs [count/s]", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Go Heap In Use", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage [time/s]", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Working Set Size", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(promtail_custom_bad_words_total{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", exported_job=~\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Promtail Bad Words", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "General", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Received Bytes [B/s]", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmitted Bytes [B/s]", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum by (pod) (avg_over_time(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[1m])))\n/\n(sum by (pod) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Heap Used per Series per Pod", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum by (container) (avg_over_time(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[1m])))\n/\n(sum by (container) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Avg Heap Used per Series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (pod) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series Count per Pod", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance_group_name) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance_group_name}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series per Config", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total Series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Read", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana-agent-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(agent_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(agent_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "container", + "multi": true, + "name": "container", + "options": [ ], + "query": "label_values(agent_build_info{cluster=~\"$cluster\", namespace=\"$namespace\"}, container)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "pod", + "multi": true, + "name": "pod", + "options": [ ], + "query": "label_values(agent_build_info{cluster=~\"$cluster\", namespace=\"$namespace\", container=\"$container\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agent Operational", + "uid": "", + "version": 0 +} diff --git a/charts/meta-monitoring/src/dashboards/agent-remote-write.json b/charts/meta-monitoring/src/dashboards/agent-remote-write.json new file mode 100644 index 0000000..f222774 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/agent-remote-write.json @@ -0,0 +1,1512 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}\n -\n ignoring(url, remote_name) group_right(pod)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Timestamp In vs. Highest Timestamp Sent", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]) / rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mean {{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency [1m]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Timestamps", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate in [5m]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate succeeded [5m]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Pending Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Dropped Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Failed Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Retried Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Current Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Max Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Min Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Desired Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shards", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Shard Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shard Details", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 15, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Remote Write Current Segment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Segments", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Enqueue Retries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Misc. Rates", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana-agent-mixin" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(agent_build_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(agent_build_info, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "container", + "options": [ ], + "query": "label_values(agent_build_info, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "pod", + "options": [ ], + "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "url", + "options": [ ], + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", pod=~\"$pod\"}, url)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agent Prometheus Remote Write", + "version": 0 +} diff --git a/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json b/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json new file mode 100644 index 0000000..3e9415b --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json @@ -0,0 +1,1065 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }} - {{ receiver }}/{{ transport }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Accepted spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }} - {{ receiver }}/{{ transport }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Refused spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }} - {{ exporter }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Exported spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }} - {{ exporter }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Exported failed spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Accepted", + "refId": "A" + }, + { + "expr": "sum(rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Refused", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Received spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Sent", + "refId": "A" + }, + { + "expr": "sum(rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Send failed", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Exported spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Write / Read", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traces_loadbalancer_backend_outcome{cluster=~\"$cluster\",namespace=~\"$namespace\",success=\"true\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Load-balanced spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "traces_loadbalancer_num_backends{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of peers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver=\"otlp/lb\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Accepted", + "refId": "A" + }, + { + "expr": "sum(rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver=\"otlp/lb\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Refused", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Received spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter=\"otlp\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Sent", + "refId": "A" + }, + { + "expr": "sum(rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter=\"otlp\"}[$__rate_interval]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Send failed", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Exported spans", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Load balancing", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana-agent-mixin" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(agent_build_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(agent_build_info, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "container", + "options": [ ], + "query": "label_values(agent_build_info, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "pod", + "options": [ ], + "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agent Tracing Pipeline", + "version": 0 +} diff --git a/charts/meta-monitoring/src/dashboards/agent.json b/charts/meta-monitoring/src/dashboards/agent.json new file mode 100644 index 0000000..768fccb --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/agent.json @@ -0,0 +1,786 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "version", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count by (pod, container, version) (agent_build_info{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "max by (pod, container) (time() - process_start_time_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Agent Stats", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Agent Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])) by (pod, scrape_job) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}/{{scrape_job}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Target Sync", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (pod) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Targets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Discovery", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n/\nrate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n* 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}} {{interval}} configured", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average Scrape Interval Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "exceeded sample limit: {{job}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "duplicate timestamp: {{job}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out of bounds: {{job}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out of order: {{job}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Scrape failures", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, instance_group_name) (rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{instance_group_name}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Appended Samples", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Retrieval", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana-agent-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(agent_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(agent_build_info, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "container", + "multi": true, + "name": "container", + "options": [ ], + "query": "label_values(agent_build_info, container)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "grafana-agent-.*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "pod", + "multi": true, + "name": "pod", + "options": [ ], + "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agent", + "uid": "", + "version": 0 +} diff --git a/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml b/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml new file mode 100644 index 0000000..6589c27 --- /dev/null +++ b/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml @@ -0,0 +1,19 @@ +{{- if .Values.dashboards.traces.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: agent-dashboards-1 + namespace: {{ $.Release.Namespace }} +data: + "agent-logs-pipeline.json": | + {{ $.Files.Get "src/dashboards/agent-logs-pipeline.json" | fromJson | toJson }} + "agent-operational.json": | + {{ $.Files.Get "src/dashboards/agent-operational.json" | fromJson | toJson }} + "agent-remote-write.json": | + {{ $.Files.Get "src/dashboards/agent-remote-write.json" | fromJson | toJson }} + "agent-tracing-pipeline.json": | + {{ $.Files.Get "src/dashboards/agent-tracing-pipeline.json" | fromJson | toJson }} + "agent.json": | + {{ $.Files.Get "src/dashboards/agent.json" | fromJson | toJson }} +{{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/dashboard.yaml b/charts/meta-monitoring/templates/grafana/dashboard.yaml index 9d5913e..1564996 100644 --- a/charts/meta-monitoring/templates/grafana/dashboard.yaml +++ b/charts/meta-monitoring/templates/grafana/dashboard.yaml @@ -80,4 +80,12 @@ data: orgId: 1 type: file {{- end }} + - disableDeletion: true + editable: false + folder: Agent + name: agent-1 + options: + path: /var/lib/grafana/dashboards/agent-1 + orgId: 1 + type: file {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/grafana.yaml b/charts/meta-monitoring/templates/grafana/grafana.yaml index 0e7d6c7..72c0f03 100644 --- a/charts/meta-monitoring/templates/grafana/grafana.yaml +++ b/charts/meta-monitoring/templates/grafana/grafana.yaml @@ -91,6 +91,8 @@ spec: - mountPath: /var/lib/grafana/dashboards/tempo-1 name: tempo-dashboards-1 {{- end }} + - mountPath: /var/lib/grafana/dashboards/agent-1 + name: agent-dashboards-1 volumes: - name: grafana-pv persistentVolumeClaim: @@ -131,6 +133,9 @@ spec: configMap: name: tempo-dashboards-1 {{- end }} + - name: agent-dashboards-1 + configMap: + name: agent-dashboards-1 --- apiVersion: v1