forked from RemoteSync/grafana-meta-monitoring-chart
Compare commits
21 Commits
add_valida
...
add_agent_
Author | SHA1 | Date | |
---|---|---|---|
|
aa988adb47 | ||
|
6fb22ae671 | ||
|
d3878e1516 | ||
|
8ae136e0c4 | ||
|
ac3e4462f9 | ||
|
e9aab491db | ||
|
c95c0e2ca9 | ||
|
c288a80bd4 | ||
|
93cac45b2e | ||
|
6ce4be70e2 | ||
|
176312167c | ||
|
07a336d9ed | ||
|
db493fbb39 | ||
|
f4d5bcc018 | ||
|
18f0dc932a | ||
|
d999ef0110 | ||
|
9dd6584bee | ||
|
04cf591478 | ||
|
9f54397e83 | ||
|
fa2b01708c | ||
|
a1cd5d36b0 |
1082
charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json
Normal file
1082
charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json
Normal file
File diff suppressed because it is too large
Load Diff
1189
charts/meta-monitoring/src/dashboards/agent-operational.json
Normal file
1189
charts/meta-monitoring/src/dashboards/agent-operational.json
Normal file
File diff suppressed because it is too large
Load Diff
1512
charts/meta-monitoring/src/dashboards/agent-remote-write.json
Normal file
1512
charts/meta-monitoring/src/dashboards/agent-remote-write.json
Normal file
File diff suppressed because it is too large
Load Diff
1065
charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json
Normal file
1065
charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json
Normal file
File diff suppressed because it is too large
Load Diff
786
charts/meta-monitoring/src/dashboards/agent.json
Normal file
786
charts/meta-monitoring/src/dashboards/agent.json
Normal file
@@ -0,0 +1,786 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"links": [ ],
|
||||
"refresh": "30s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"styles": [
|
||||
{
|
||||
"alias": "Time",
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"pattern": "Time",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"alias": "Count",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
"thresholds": [ ],
|
||||
"type": "hidden",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Uptime",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Container",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "container",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Pod",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "pod",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Version",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "version",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"pattern": "/.*/",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short"
|
||||
}
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count by (pod, container, version) (agent_build_info{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A",
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "max by (pod, container) (time() - process_start_time_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "B",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Agent Stats",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"transform": "table",
|
||||
"type": "table",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Agent Stats",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])) by (pod, scrape_job) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}/{{scrape_job}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Target Sync",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (pod) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Targets",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Prometheus Discovery",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n/\nrate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n* 1e3\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}} {{interval}} configured",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Average Scrape Interval Duration",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "exceeded sample limit: {{job}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "duplicate timestamp: {{job}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "out of bounds: {{job}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "out of order: {{job}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Scrape failures",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (job, instance_group_name) (rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{job}} {{instance_group_name}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Appended Samples",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Prometheus Retrieval",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"grafana-agent-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(agent_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "namespace",
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(agent_build_info, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "container",
|
||||
"multi": true,
|
||||
"name": "container",
|
||||
"options": [ ],
|
||||
"query": "label_values(agent_build_info, container)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "grafana-agent-.*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "pod",
|
||||
"multi": true,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": "label_values(agent_build_info{container=~\"$container\"}, pod)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Agent",
|
||||
"uid": "",
|
||||
"version": 0
|
||||
}
|
@@ -217,15 +217,15 @@
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
|
||||
"expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "received",
|
||||
"legendFormat": "in progress",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
@@ -233,7 +233,7 @@
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Delete Requests Received / Day",
|
||||
"title": "# of Delete Requests (received - processed) ",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
@@ -293,7 +293,83 @@
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "received",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Delete Requests Received / Day",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
@@ -361,7 +437,7 @@
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@@ -381,7 +457,247 @@
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Compactor CPU usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": " {{pod}} ",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Compactor memory usage (MiB)",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Compaction run duration (seconds)",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Compactor",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
@@ -429,19 +745,7 @@
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Failures",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
@@ -449,7 +753,7 @@
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 6,
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@@ -469,7 +773,7 @@
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
@@ -523,7 +827,45 @@
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Deleted lines",
|
||||
"title": "Deletion metrics",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "$loki_datasource",
|
||||
"id": 11,
|
||||
"span": 6,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "In progress/finished",
|
||||
"type": "logs"
|
||||
},
|
||||
{
|
||||
"datasource": "$loki_datasource",
|
||||
"id": 12,
|
||||
"span": 6,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "List of deletion requests",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
|
@@ -6,7 +6,6 @@
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": 8,
|
||||
"iteration": 1583185057230,
|
||||
"links": [
|
||||
{
|
||||
|
@@ -1,41 +1,27 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"enable": false,
|
||||
"expr": "sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))",
|
||||
"iconColor": "red",
|
||||
"name": "WAL Truncations",
|
||||
"target": {
|
||||
"queryType": "Azure Monitor",
|
||||
"refId": "Anno"
|
||||
},
|
||||
"titleFormat": "{{tenant}}"
|
||||
}
|
||||
]
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"iteration": 1635347545534,
|
||||
"links": [ ],
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"loki"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Loki Dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
@@ -599,59 +585,139 @@
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 31,
|
||||
"refresh": "10s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [ ],
|
||||
"tags": [
|
||||
"loki"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"description": null,
|
||||
"error": null,
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Datasource",
|
||||
"multi": false,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"queryValue": "",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"datasource": "${datasource}",
|
||||
"definition": "label_values(loki_ruler_wal_samples_appended_total, tenant)",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(loki_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "loki_datasource",
|
||||
"options": [ ],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Tenant",
|
||||
"multi": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "tenant",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(loki_ruler_wal_samples_appended_total, tenant)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
"query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))",
|
||||
"refresh": 0,
|
||||
"regex": "/\"([^\"]+)\"/",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": { },
|
||||
"timezone": "",
|
||||
"title": "Recording Rules",
|
||||
"uid": "2xKA_ZK7k",
|
||||
"version": 9,
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Loki / Recording Rules",
|
||||
"uid": "recording-rules",
|
||||
"version": 0,
|
||||
"weekStart": ""
|
||||
}
|
@@ -6,7 +6,6 @@
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": 68,
|
||||
"iteration": 1588704280892,
|
||||
"links": [
|
||||
{
|
||||
@@ -567,17 +566,17 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".99",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.75, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".9",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".5",
|
||||
"refId": "C"
|
||||
}
|
||||
@@ -673,17 +672,17 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".99",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".9",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".5",
|
||||
"refId": "C"
|
||||
}
|
||||
@@ -779,7 +778,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{route}}",
|
||||
"refId": "A"
|
||||
@@ -877,18 +876,18 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".99",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"hide": false,
|
||||
"legendFormat": ".9",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"hide": false,
|
||||
"legendFormat": ".5",
|
||||
"refId": "C"
|
||||
@@ -985,7 +984,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{route}}",
|
||||
"refId": "A"
|
||||
@@ -1085,17 +1084,17 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"legendFormat": "{{route}}-.99",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"legendFormat": "{{route}}-.9",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))",
|
||||
"legendFormat": "{{route}}-.5",
|
||||
"refId": "C"
|
||||
}
|
||||
@@ -1191,17 +1190,17 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".99-{{route}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".9-{{route}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".5-{{route}}",
|
||||
"refId": "C"
|
||||
}
|
||||
@@ -1297,7 +1296,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[5m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{route}}",
|
||||
"refId": "A"
|
||||
@@ -1396,17 +1395,17 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".99-{{route}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".9-{{route}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3",
|
||||
"legendFormat": ".5-{{route}}",
|
||||
"refId": "C"
|
||||
}
|
||||
@@ -1502,7 +1501,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{route}}",
|
||||
"refId": "A"
|
||||
@@ -2049,7 +2048,7 @@
|
||||
"panels": [ ],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"",
|
||||
"expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} |= \"level=error\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -2100,7 +2099,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{route}}",
|
||||
@@ -2190,9 +2189,9 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (ingester)",
|
||||
"expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ingester}}",
|
||||
"legendFormat": "{{pod}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -3256,7 +3255,7 @@
|
||||
"panels": [ ],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"",
|
||||
"expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} |= \"level=error\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -3307,7 +3306,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[1m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[1m])) by (route)",
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{route}}",
|
||||
|
@@ -53,6 +53,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -72,6 +77,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})",
|
||||
"format": "time_series",
|
||||
@@ -142,6 +155,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -161,6 +179,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)",
|
||||
"format": "time_series",
|
||||
@@ -509,83 +535,6 @@
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "loki_boltdb_shipper_query_readiness_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "duration",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Query Readiness Duration",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
@@ -607,7 +556,7 @@
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@@ -626,6 +575,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -645,6 +599,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})",
|
||||
"format": "time_series",
|
||||
@@ -695,7 +657,7 @@
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 9,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@@ -714,6 +676,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -733,6 +700,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)",
|
||||
"format": "time_series",
|
||||
@@ -783,7 +758,7 @@
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 10,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
|
@@ -142,7 +142,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ route }} 99th Percentile",
|
||||
@@ -150,7 +150,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ route }} 50th Percentile",
|
||||
@@ -158,7 +158,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ",
|
||||
"expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ route }} Average",
|
||||
|
@@ -52,6 +52,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -71,6 +76,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})",
|
||||
"format": "time_series",
|
||||
@@ -140,6 +153,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -159,6 +177,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)",
|
||||
"format": "time_series",
|
||||
|
@@ -128,6 +128,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -147,6 +152,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})",
|
||||
"format": "time_series",
|
||||
@@ -217,6 +230,11 @@
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
@@ -236,6 +254,14 @@
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)",
|
||||
"format": "time_series",
|
||||
|
@@ -142,7 +142,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
@@ -150,7 +150,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3",
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
@@ -158,7 +158,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})",
|
||||
"expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
|
@@ -0,0 +1,836 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limit",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limit",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory (workingset)",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory (go heap inuse)",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Alertmanager",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Receive bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Transmit bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}} - {{device}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk writes",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}} - {{device}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk reads",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Disk",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{persistentvolumeclaim}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk space utilization",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Alertmanager resources",
|
||||
"uid": "a6883fb22799ac74479c7db872451092",
|
||||
"version": 0
|
||||
}
|
2703
charts/meta-monitoring/src/dashboards/mimir-alertmanager.json
Normal file
2703
charts/meta-monitoring/src/dashboards/mimir-alertmanager.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,940 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limit",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory (go heap inuse)",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "CPU and memory",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limit",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory (RSS)",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "request",
|
||||
"color": "#FFC000",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
},
|
||||
{
|
||||
"alias": "limit",
|
||||
"color": "#E02F44",
|
||||
"dashLength": 5,
|
||||
"dashes": true,
|
||||
"fill": 0
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limit",
|
||||
"legendLink": null
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "request",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory (workingset)",
|
||||
"tooltip": {
|
||||
"sort": 2
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Receive bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Transmit bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}} - {{device}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk writes",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}} - {{device}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk reads",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{persistentvolumeclaim}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk space utilization",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Disk",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Compactor resources",
|
||||
"uid": "09a5c49e9cdb2f2b24c6d184574a07fd",
|
||||
"version": 0
|
||||
}
|
2276
charts/meta-monitoring/src/dashboards/mimir-compactor.json
Normal file
2276
charts/meta-monitoring/src/dashboards/mimir-compactor.json
Normal file
File diff suppressed because it is too large
Load Diff
312
charts/meta-monitoring/src/dashboards/mimir-config.json
Normal file
312
charts/meta-monitoring/src/dashboards/mimir-config.json
Normal file
@@ -0,0 +1,312 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "sha256:{{sha256}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Startup config file hashes",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "instances",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Startup config file",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "sha256:{{sha256}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Runtime config file hashes",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "instances",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Runtime config file",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "namespace",
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Config",
|
||||
"uid": "5d9d0b4724c0f80d68467088ec61e003",
|
||||
"version": 0
|
||||
}
|
938
charts/meta-monitoring/src/dashboards/mimir-object-store.json
Normal file
938
charts/meta-monitoring/src/dashboards/mimir-object-store.json
Normal file
@@ -0,0 +1,938 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{component}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "RPS / component",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"noValue": "0",
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"id": 2,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"span": 6,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{component}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"title": "Error rate / component",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Components",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{operation}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "RPS / operation",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"noValue": "0",
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"id": 4,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"span": 6,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{operation}}",
|
||||
"legendLink": null
|
||||
}
|
||||
],
|
||||
"title": "Error rate / operation",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Operations",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: Get",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: GetRange",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: Exists",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: Attributes",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: Upload",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "99th Percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "50th Percentile",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Average",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Op: Delete",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "namespace",
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Object Store",
|
||||
"uid": "e1324ee2a434f4158c00a9ee279d3292",
|
||||
"version": 0
|
||||
}
|
266
charts/meta-monitoring/src/dashboards/mimir-overrides.json
Normal file
266
charts/meta-monitoring/src/dashboards/mimir-overrides.json
Normal file
@@ -0,0 +1,266 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"id": 1,
|
||||
"span": 12,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})",
|
||||
"instant": true,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Defaults",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": { }
|
||||
},
|
||||
{
|
||||
"id": "merge",
|
||||
"options": { }
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Value": 1,
|
||||
"limit_name": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": { },
|
||||
"sort": [
|
||||
{
|
||||
"field": "limit_name"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"id": 2,
|
||||
"span": 12,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})",
|
||||
"instant": true,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Per-tenant overrides",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {
|
||||
"mode": "columns",
|
||||
"valueLabel": "limit_name"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "merge",
|
||||
"options": { }
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {
|
||||
"user": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": ".*",
|
||||
"value": ".*"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Tenant ID",
|
||||
"name": "tenant_id",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": ".*",
|
||||
"value": ".*"
|
||||
}
|
||||
],
|
||||
"query": ".*",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Overrides",
|
||||
"uid": "1e2c358600ac53f09faea133f811b5bb",
|
||||
"version": 0
|
||||
}
|
1135
charts/meta-monitoring/src/dashboards/mimir-overview-networking.json
Normal file
1135
charts/meta-monitoring/src/dashboards/mimir-overview-networking.json
Normal file
File diff suppressed because it is too large
Load Diff
1315
charts/meta-monitoring/src/dashboards/mimir-overview-resources.json
Normal file
1315
charts/meta-monitoring/src/dashboards/mimir-overview-resources.json
Normal file
File diff suppressed because it is too large
Load Diff
1476
charts/meta-monitoring/src/dashboards/mimir-overview.json
Normal file
1476
charts/meta-monitoring/src/dashboards/mimir-overview.json
Normal file
File diff suppressed because it is too large
Load Diff
3201
charts/meta-monitoring/src/dashboards/mimir-queries.json
Normal file
3201
charts/meta-monitoring/src/dashboards/mimir-queries.json
Normal file
File diff suppressed because it is too large
Load Diff
2134
charts/meta-monitoring/src/dashboards/mimir-reads-networking.json
Normal file
2134
charts/meta-monitoring/src/dashboards/mimir-reads-networking.json
Normal file
File diff suppressed because it is too large
Load Diff
2657
charts/meta-monitoring/src/dashboards/mimir-reads-resources.json
Normal file
2657
charts/meta-monitoring/src/dashboards/mimir-reads-resources.json
Normal file
File diff suppressed because it is too large
Load Diff
4683
charts/meta-monitoring/src/dashboards/mimir-reads.json
Normal file
4683
charts/meta-monitoring/src/dashboards/mimir-reads.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1177
charts/meta-monitoring/src/dashboards/mimir-remote-ruler-reads.json
Normal file
1177
charts/meta-monitoring/src/dashboards/mimir-remote-ruler-reads.json
Normal file
File diff suppressed because it is too large
Load Diff
1448
charts/meta-monitoring/src/dashboards/mimir-rollout-progress.json
Normal file
1448
charts/meta-monitoring/src/dashboards/mimir-rollout-progress.json
Normal file
File diff suppressed because it is too large
Load Diff
2604
charts/meta-monitoring/src/dashboards/mimir-ruler.json
Normal file
2604
charts/meta-monitoring/src/dashboards/mimir-ruler.json
Normal file
File diff suppressed because it is too large
Load Diff
362
charts/meta-monitoring/src/dashboards/mimir-scaling.json
Normal file
362
charts/meta-monitoring/src/dashboards/mimir-scaling.json
Normal file
@@ -0,0 +1,362 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "200px",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"options": {
|
||||
"content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n",
|
||||
"mode": "markdown"
|
||||
},
|
||||
"span": 12,
|
||||
"title": "",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Service scaling",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "400px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"sort": {
|
||||
"col": 0,
|
||||
"desc": false
|
||||
},
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"styles": [
|
||||
{
|
||||
"alias": "Time",
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"pattern": "Time",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"alias": "Required Replicas",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Cluster",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "__name__",
|
||||
"thresholds": [ ],
|
||||
"type": "hidden",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Cluster",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "cluster",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Service",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "deployment",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Namespace",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "namespace",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "Reason",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "reason",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "short"
|
||||
},
|
||||
{
|
||||
"alias": "",
|
||||
"colorMode": null,
|
||||
"colors": [ ],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"pattern": "/.*/",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short"
|
||||
}
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Workload-based scaling",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"transform": "table",
|
||||
"type": "table",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Scaling",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "namespace",
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Scaling",
|
||||
"uid": "64bbad83507b7289b514725658e10352",
|
||||
"version": 0
|
||||
}
|
323
charts/meta-monitoring/src/dashboards/mimir-slow-queries.json
Normal file
323
charts/meta-monitoring/src/dashboards/mimir-slow-queries.json
Normal file
@@ -0,0 +1,323 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "8.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Mimir dashboards",
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "${lokidatasource}",
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Time range"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "mappings",
|
||||
"value": [
|
||||
{
|
||||
"from": "",
|
||||
"id": 1,
|
||||
"text": "Instant query",
|
||||
"to": "",
|
||||
"type": 1,
|
||||
"value": "0"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "s"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Step"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "s"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"id": 1,
|
||||
"span": 12,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | response_time > ${min_duration}",
|
||||
"instant": false,
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Slow queries",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "extractFields",
|
||||
"options": {
|
||||
"source": "labels"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "calculateField",
|
||||
"options": {
|
||||
"alias": "Time range",
|
||||
"binary": {
|
||||
"left": "param_end",
|
||||
"operator": "-",
|
||||
"reducer": "sum",
|
||||
"right": "param_start"
|
||||
},
|
||||
"mode": "binary",
|
||||
"reduce": {
|
||||
"reducer": "sum"
|
||||
},
|
||||
"replaceFields": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Line": true,
|
||||
"Time": true,
|
||||
"caller": true,
|
||||
"cluster": true,
|
||||
"container": true,
|
||||
"host": true,
|
||||
"id": true,
|
||||
"job": true,
|
||||
"labels": true,
|
||||
"level": true,
|
||||
"line": true,
|
||||
"method": true,
|
||||
"msg": true,
|
||||
"name": true,
|
||||
"namespace": true,
|
||||
"param_end": true,
|
||||
"param_start": true,
|
||||
"param_time": true,
|
||||
"path": true,
|
||||
"pod": true,
|
||||
"pod_template_hash": true,
|
||||
"query_wall_time_seconds": true,
|
||||
"stream": true,
|
||||
"traceID": true,
|
||||
"tsNs": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time range": 3,
|
||||
"param_query": 2,
|
||||
"param_step": 4,
|
||||
"response_time": 5,
|
||||
"ts": 0,
|
||||
"user": 1
|
||||
},
|
||||
"renameByName": {
|
||||
"org_id": "Tenant ID",
|
||||
"param_query": "Query",
|
||||
"param_step": "Step",
|
||||
"response_time": "Duration"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mimir"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Logs datasource",
|
||||
"multi": false,
|
||||
"name": "lokidatasource",
|
||||
"query": "loki",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "5s",
|
||||
"value": "5s"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Min duration",
|
||||
"name": "min_duration",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "5s",
|
||||
"value": "5s"
|
||||
}
|
||||
],
|
||||
"query": "5s",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": ".*",
|
||||
"value": ".*"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Tenant ID",
|
||||
"name": "tenant_id",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": ".*",
|
||||
"value": ".*"
|
||||
}
|
||||
],
|
||||
"query": ".*",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Mimir / Slow queries",
|
||||
"uid": "6089e1ce1e678788f46312a0a1e647e6",
|
||||
"version": 0
|
||||
}
|
2817
charts/meta-monitoring/src/dashboards/mimir-tenants.json
Normal file
2817
charts/meta-monitoring/src/dashboards/mimir-tenants.json
Normal file
File diff suppressed because it is too large
Load Diff
1467
charts/meta-monitoring/src/dashboards/mimir-top-tenants.json
Normal file
1467
charts/meta-monitoring/src/dashboards/mimir-top-tenants.json
Normal file
File diff suppressed because it is too large
Load Diff
1135
charts/meta-monitoring/src/dashboards/mimir-writes-networking.json
Normal file
1135
charts/meta-monitoring/src/dashboards/mimir-writes-networking.json
Normal file
File diff suppressed because it is too large
Load Diff
1373
charts/meta-monitoring/src/dashboards/mimir-writes-resources.json
Normal file
1373
charts/meta-monitoring/src/dashboards/mimir-writes-resources.json
Normal file
File diff suppressed because it is too large
Load Diff
2914
charts/meta-monitoring/src/dashboards/mimir-writes.json
Normal file
2914
charts/meta-monitoring/src/dashboards/mimir-writes.json
Normal file
File diff suppressed because it is too large
Load Diff
7010
charts/meta-monitoring/src/dashboards/tempo-operational.json
Normal file
7010
charts/meta-monitoring/src/dashboards/tempo-operational.json
Normal file
File diff suppressed because it is too large
Load Diff
1612
charts/meta-monitoring/src/dashboards/tempo-reads.json
Normal file
1612
charts/meta-monitoring/src/dashboards/tempo-reads.json
Normal file
File diff suppressed because it is too large
Load Diff
2431
charts/meta-monitoring/src/dashboards/tempo-resources.json
Normal file
2431
charts/meta-monitoring/src/dashboards/tempo-resources.json
Normal file
File diff suppressed because it is too large
Load Diff
1559
charts/meta-monitoring/src/dashboards/tempo-rollout-progress.json
Normal file
1559
charts/meta-monitoring/src/dashboards/tempo-rollout-progress.json
Normal file
File diff suppressed because it is too large
Load Diff
1181
charts/meta-monitoring/src/dashboards/tempo-tenants.json
Normal file
1181
charts/meta-monitoring/src/dashboards/tempo-tenants.json
Normal file
File diff suppressed because it is too large
Load Diff
1738
charts/meta-monitoring/src/dashboards/tempo-writes.json
Normal file
1738
charts/meta-monitoring/src/dashboards/tempo-writes.json
Normal file
File diff suppressed because it is too large
Load Diff
53
charts/meta-monitoring/src/rules/loki-rules.yaml
Normal file
53
charts/meta-monitoring/src/rules/loki-rules.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
groups:
|
||||
- name: loki_rules
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:loki_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:loki_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m]))
|
||||
by (cluster, job)
|
||||
record: cluster_job:loki_request_duration_seconds:avg
|
||||
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job)
|
||||
record: cluster_job:loki_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:loki_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job)
|
||||
record: cluster_job:loki_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:loki_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:loki_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
|
||||
/ sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:loki_request_duration_seconds:avg
|
||||
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job,
|
||||
route)
|
||||
record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:loki_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
|
||||
job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster,
|
||||
namespace, job, route)
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds:avg
|
||||
- expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate
|
571
charts/meta-monitoring/src/rules/mimir-rules.yaml
Normal file
571
charts/meta-monitoring/src/rules/mimir-rules.yaml
Normal file
@@ -0,0 +1,571 @@
|
||||
groups:
|
||||
- name: mimir_api_1
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m]))
|
||||
by (cluster, job)
|
||||
record: cluster_job:cortex_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)
|
||||
record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_api_2
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:cortex_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:cortex_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
|
||||
/ sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:cortex_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job,
|
||||
route)
|
||||
record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
|
||||
record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_api_3
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
|
||||
job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster,
|
||||
namespace, job, route)
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace,
|
||||
job, route)
|
||||
record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_querier_api
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_querier_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_querier_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_querier_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, route))
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by
|
||||
(cluster, job, route)
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job, route)
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job, route)
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster,
|
||||
job, route)
|
||||
record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m]))
|
||||
by (cluster, namespace, job, route)
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
namespace, job, route)
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster,
|
||||
namespace, job, route)
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster,
|
||||
namespace, job, route)
|
||||
record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_cache
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, method))
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, method))
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m]))
|
||||
by (cluster, job, method)
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_cache_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_cache_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
/ sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_cache_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_cache_request_duration_seconds_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, method))
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job, method))
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
|
||||
method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
|
||||
method)
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
|
||||
job, method)
|
||||
record: cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_storage
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_kv_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_kv_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
/ sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_kv_request_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate
|
||||
- name: mimir_queries
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_query_frontend_retries:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_query_frontend_retries:50quantile
|
||||
- expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m]))
|
||||
by (cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_retries:avg
|
||||
- expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_retries_sum:sum_rate
|
||||
- expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_retries_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile
|
||||
- expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
|
||||
job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by
|
||||
(cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg
|
||||
- expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le,
|
||||
cluster, job)
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate
|
||||
- name: mimir_ingester_queries
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_series:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_series:50quantile
|
||||
- expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m]))
|
||||
by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_series:avg
|
||||
- expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_series_sum:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_series_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_samples:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_samples:50quantile
|
||||
- expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m]))
|
||||
by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_samples:avg
|
||||
- expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_samples_count:sum_rate
|
||||
- expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_exemplars:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m]))
|
||||
by (le, cluster, job))
|
||||
record: cluster_job:cortex_ingester_queried_exemplars:50quantile
|
||||
- expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) /
|
||||
sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_exemplars:avg
|
||||
- expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster,
|
||||
job)
|
||||
record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate
|
||||
- expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job)
|
||||
record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate
|
||||
- name: mimir_received_samples
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))
|
||||
record: cluster_namespace_job:cortex_distributor_received_samples:rate5m
|
||||
- name: mimir_exemplars_in
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m]))
|
||||
record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m
|
||||
- name: mimir_received_exemplars
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m]))
|
||||
record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m
|
||||
- name: mimir_exemplars_ingested
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m]))
|
||||
record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m
|
||||
- name: mimir_exemplars_appended
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m]))
|
||||
record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m
|
||||
- name: mimir_scaling_rules
|
||||
rules:
|
||||
- expr: |
|
||||
# Convenience rule to get the number of replicas for both a deployment and a statefulset.
|
||||
# Multi-zone deployments are grouped together removing the "zone-X" suffix.
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
kube_deployment_spec_replicas,
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
or
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")
|
||||
)
|
||||
record: cluster_namespace_deployment:actual_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
quantile_over_time(0.99,
|
||||
sum by (cluster, namespace) (
|
||||
cluster_namespace_job:cortex_distributor_received_samples:rate5m
|
||||
)[24h:]
|
||||
)
|
||||
/ 240000
|
||||
)
|
||||
labels:
|
||||
deployment: distributor
|
||||
reason: sample_rate
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
|
||||
* 0.59999999999999998 / 240000
|
||||
)
|
||||
labels:
|
||||
deployment: distributor
|
||||
reason: sample_rate_limits
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
quantile_over_time(0.99,
|
||||
sum by (cluster, namespace) (
|
||||
cluster_namespace_job:cortex_distributor_received_samples:rate5m
|
||||
)[24h:]
|
||||
)
|
||||
* 3 / 80000
|
||||
)
|
||||
labels:
|
||||
deployment: ingester
|
||||
reason: sample_rate
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
quantile_over_time(0.99,
|
||||
sum by(cluster, namespace) (
|
||||
cortex_ingester_memory_series
|
||||
)[24h:]
|
||||
)
|
||||
/ 1500000
|
||||
)
|
||||
labels:
|
||||
deployment: ingester
|
||||
reason: active_series
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"})
|
||||
* 3 * 0.59999999999999998 / 1500000
|
||||
)
|
||||
labels:
|
||||
deployment: ingester
|
||||
reason: active_series_limits
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"})
|
||||
* 0.59999999999999998 / 80000
|
||||
)
|
||||
labels:
|
||||
deployment: ingester
|
||||
reason: sample_rate_limits
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
ceil(
|
||||
(sum by (cluster, namespace) (
|
||||
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"}
|
||||
) / 4)
|
||||
/
|
||||
avg by (cluster, namespace) (
|
||||
memcached_limit_bytes{job=~".+/memcached"}
|
||||
)
|
||||
)
|
||||
labels:
|
||||
deployment: memcached
|
||||
reason: active_series
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])),
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate
|
||||
- expr: |
|
||||
# Convenience rule to get the CPU request for both a deployment and a statefulset.
|
||||
# Multi-zone deployments are grouped together removing the "zone-X" suffix.
|
||||
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
|
||||
# that remove resource metrics, ref:
|
||||
# - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
|
||||
# - https://github.com/kubernetes/kube-state-metrics/pull/1004
|
||||
#
|
||||
# This is the old expression, compatible with kube-state-metrics < v2.0.0,
|
||||
# where kube_pod_container_resource_requests_cpu_cores was removed:
|
||||
(
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_container_resource_requests_cpu_cores,
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
)
|
||||
or
|
||||
# This expression is compatible with kube-state-metrics >= v1.4.0,
|
||||
# where kube_pod_container_resource_requests was introduced.
|
||||
(
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_container_resource_requests{resource="cpu"},
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
)
|
||||
record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum
|
||||
- expr: |
|
||||
# Jobs should be sized to their CPU usage.
|
||||
# We do this by comparing 99th percentile usage over the last 24hrs to
|
||||
# their current provisioned #replicas and resource requests.
|
||||
ceil(
|
||||
cluster_namespace_deployment:actual_replicas:count
|
||||
*
|
||||
quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h])
|
||||
/
|
||||
cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum
|
||||
)
|
||||
labels:
|
||||
reason: cpu_usage
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- expr: |
|
||||
# Convenience rule to get the Memory utilization for both a deployment and a statefulset.
|
||||
# Multi-zone deployments are grouped together removing the "zone-X" suffix.
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
container_memory_usage_bytes{image!=""},
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
record: cluster_namespace_deployment:container_memory_usage_bytes:sum
|
||||
- expr: |
|
||||
# Convenience rule to get the Memory request for both a deployment and a statefulset.
|
||||
# Multi-zone deployments are grouped together removing the "zone-X" suffix.
|
||||
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
|
||||
# that remove resource metrics, ref:
|
||||
# - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16
|
||||
# - https://github.com/kubernetes/kube-state-metrics/pull/1004
|
||||
#
|
||||
# This is the old expression, compatible with kube-state-metrics < v2.0.0,
|
||||
# where kube_pod_container_resource_requests_memory_bytes was removed:
|
||||
(
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_container_resource_requests_memory_bytes,
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
)
|
||||
or
|
||||
# This expression is compatible with kube-state-metrics >= v1.4.0,
|
||||
# where kube_pod_container_resource_requests was introduced.
|
||||
(
|
||||
sum by (cluster, namespace, deployment) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_container_resource_requests{resource="memory"},
|
||||
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
|
||||
),
|
||||
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
|
||||
# always matches everything and the (optional) zone is not removed.
|
||||
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
|
||||
)
|
||||
)
|
||||
)
|
||||
record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum
|
||||
- expr: |
|
||||
# Jobs should be sized to their Memory usage.
|
||||
# We do this by comparing 99th percentile usage over the last 24hrs to
|
||||
# their current provisioned #replicas and resource requests.
|
||||
ceil(
|
||||
cluster_namespace_deployment:actual_replicas:count
|
||||
*
|
||||
quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h])
|
||||
/
|
||||
cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum
|
||||
)
|
||||
labels:
|
||||
reason: memory_usage
|
||||
record: cluster_namespace_deployment_reason:required_replicas:count
|
||||
- name: mimir_alertmanager_rules
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (cluster, job, pod) (cortex_alertmanager_alerts)
|
||||
record: cluster_job_pod:cortex_alertmanager_alerts:sum
|
||||
- expr: |
|
||||
sum by (cluster, job, pod) (cortex_alertmanager_silences)
|
||||
record: cluster_job_pod:cortex_alertmanager_silences:sum
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m]))
|
||||
record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m]))
|
||||
record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_state_replication_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m
|
||||
- expr: |
|
||||
sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m]))
|
||||
record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m
|
||||
- name: mimir_ingester_rules
|
||||
rules:
|
||||
- expr: |
|
||||
sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m]))
|
||||
record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m
|
15
charts/meta-monitoring/src/rules/tempo-rules.yaml
Normal file
15
charts/meta-monitoring/src/rules/tempo-rules.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
groups:
|
||||
- name: tempo_rules
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds:99quantile
|
||||
- expr: histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds:50quantile
|
||||
- expr: sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds:avg
|
||||
- expr: sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate
|
||||
- expr: sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate
|
||||
- expr: sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
|
||||
record: cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate
|
@@ -17,6 +17,14 @@
|
||||
{{- join ", " $list }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "agent.loki_process_targets" -}}
|
||||
{{- if empty .Values.logs.piiRegexes }}
|
||||
{{- include "agent.loki_write_targets" . }}
|
||||
{{- else }}
|
||||
{{- printf "loki.process.PII.receiver" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "agent.prometheus_write_targets" -}}
|
||||
{{- $list := list }}
|
||||
{{- if .Values.local.metrics.enabled }}
|
||||
|
@@ -37,20 +37,154 @@ data:
|
||||
}
|
||||
}
|
||||
|
||||
// Logs
|
||||
|
||||
{{- if or .Values.local.logs.enabled .Values.cloud.logs.enabled }}
|
||||
loki.source.kubernetes "pods" {
|
||||
targets = discovery.relabel.rename_meta_labels.output
|
||||
forward_to = [ {{ include "agent.loki_process_targets" . }} ]
|
||||
}
|
||||
|
||||
{{- if not (empty .Values.logs.piiRegexes) }}
|
||||
loki.process "PII" {
|
||||
forward_to = [ {{ include "agent.loki_write_targets" . }} ]
|
||||
|
||||
{{- range .Values.logs.piiRegexes }}
|
||||
stage.replace {
|
||||
expression = "{{ .expression }}"
|
||||
source = "{{ .source }}"
|
||||
replace = "{{ .replace }}"
|
||||
}
|
||||
{{- end }}
|
||||
}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
// Metrics
|
||||
|
||||
{{- if or .Values.local.metrics.enabled .Values.cloud.metrics.enabled }}
|
||||
prometheus.scrape "pods" {
|
||||
targets = discovery.relabel.rename_meta_labels.output
|
||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||
}
|
||||
{{- if .Values.kubeStateMetrics.enabled }}
|
||||
|
||||
prometheus.scrape "kubeStateMetrics" {
|
||||
targets = [ { "__address__" = "{{ .Values.kubeStateMetrics.endpoint }}" } ]
|
||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
// cAdvisor and Kubelete metrics
|
||||
// Based on https://github.com/Chewie/loutretelecom-manifests/blob/main/manifests/addons/monitoring/config.river
|
||||
discovery.kubernetes "all_nodes" {
|
||||
role = "node"
|
||||
}
|
||||
|
||||
discovery.relabel "all_nodes" {
|
||||
targets = discovery.kubernetes.all_nodes.targets
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_node_name"]
|
||||
target_label = "node"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
target_label = "namespace"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
target_label = "pod"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
|
||||
separator = "/"
|
||||
regex = "(.*)/(.*)/(.*)"
|
||||
replacement = "${1}/${2}-${3}"
|
||||
target_label = "job"
|
||||
}
|
||||
rule {
|
||||
target_label = "cluster"
|
||||
replacement = "{{- .Values.clusterName -}}"
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.scrape "cadvisor" {
|
||||
targets = discovery.relabel.all_nodes.output
|
||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||
|
||||
scrape_interval = "15s"
|
||||
metrics_path = "/metrics/cadvisor"
|
||||
scheme = "https"
|
||||
|
||||
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
tls_config {
|
||||
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.scrape "kubelet" {
|
||||
targets = discovery.relabel.all_nodes.output
|
||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||
|
||||
scrape_interval = "15s"
|
||||
metrics_path = "/metrics"
|
||||
scheme = "https"
|
||||
|
||||
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
tls_config {
|
||||
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.exporter.unix {}
|
||||
|
||||
prometheus.scrape "node_exporter" {
|
||||
targets = prometheus.exporter.unix.targets
|
||||
forward_to = [prometheus.relabel.node_exporter.receiver]
|
||||
|
||||
job_name = "node-exporter"
|
||||
scrape_interval = "15s"
|
||||
}
|
||||
|
||||
prometheus.relabel "node_exporter" {
|
||||
forward_to = [ {{ include "agent.prometheus_write_targets" . }} ]
|
||||
|
||||
rule {
|
||||
replacement = env("HOSTNAME")
|
||||
target_label = "nodename"
|
||||
}
|
||||
rule {
|
||||
replacement = "node-exporter"
|
||||
target_label = "job"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_node_name"]
|
||||
target_label = "node"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
target_label = "namespace"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
target_label = "pod"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
|
||||
separator = "/"
|
||||
regex = "(.*)/(.*)/(.*)"
|
||||
replacement = "${1}/${2}-${3}"
|
||||
target_label = "job"
|
||||
}
|
||||
rule {
|
||||
target_label = "cluster"
|
||||
replacement = "{{- .Values.clusterName -}}"
|
||||
}
|
||||
}
|
||||
{{- end }}
|
||||
|
||||
// Traces
|
||||
|
||||
{{- if or .Values.local.traces.enabled .Values.cloud.traces.enabled }}
|
||||
// Shamelessly copied from https://github.com/grafana/intro-to-mlt/blob/main/agent/config.river
|
||||
otelcol.receiver.otlp "otlp_receiver" {
|
||||
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: agent-dashboards-1
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"agent-logs-pipeline.json": |
|
||||
{{ $.Files.Get "src/dashboards/agent-logs-pipeline.json" | fromJson | toJson }}
|
||||
"agent-operational.json": |
|
||||
{{ $.Files.Get "src/dashboards/agent-operational.json" | fromJson | toJson }}
|
||||
"agent-remote-write.json": |
|
||||
{{ $.Files.Get "src/dashboards/agent-remote-write.json" | fromJson | toJson }}
|
||||
"agent-tracing-pipeline.json": |
|
||||
{{ $.Files.Get "src/dashboards/agent-tracing-pipeline.json" | fromJson | toJson }}
|
||||
"agent.json": |
|
||||
{{ $.Files.Get "src/dashboards/agent.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -1,15 +1,16 @@
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
{{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: loki-dashboards-provisioning
|
||||
name: dashboards-provisioning
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
dashboards.yaml: |
|
||||
---
|
||||
apiVersion: 1
|
||||
providers:
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Loki
|
||||
@@ -27,3 +28,64 @@ data:
|
||||
orgId: 1
|
||||
type: file
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Mimir
|
||||
name: mimir-1
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/mimir-1
|
||||
orgId: 1
|
||||
type: file
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Mimir
|
||||
name: mimir-2
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/mimir-2
|
||||
orgId: 1
|
||||
type: file
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Mimir
|
||||
name: mimir-3
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/mimir-3
|
||||
orgId: 1
|
||||
type: file
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Mimir
|
||||
name: mimir-4
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/mimir-4
|
||||
orgId: 1
|
||||
type: file
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Mimir
|
||||
name: mimir-5
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/mimir-5
|
||||
orgId: 1
|
||||
type: file
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Tempo
|
||||
name: tempo-1
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/tempo-1
|
||||
orgId: 1
|
||||
type: file
|
||||
{{- end }}
|
||||
- disableDeletion: true
|
||||
editable: false
|
||||
folder: Agent
|
||||
name: agent-1
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/agent-1
|
||||
orgId: 1
|
||||
type: file
|
||||
{{- end }}
|
@@ -3,7 +3,7 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: loki-datasources-provisioning
|
||||
name: datasources-provisioning
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
datasources.yaml: |
|
||||
|
@@ -64,26 +64,46 @@ spec:
|
||||
- mountPath: /var/lib/grafana
|
||||
name: grafana-pv
|
||||
- mountPath: /etc/grafana/provisioning/datasources
|
||||
name: loki-datasources-provisioning
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
name: datasources-provisioning
|
||||
{{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
|
||||
- mountPath: /etc/grafana/provisioning/dashboards
|
||||
name: loki-dashboards-provisioning
|
||||
name: dashboards-provisioning
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
- mountPath: /var/lib/grafana/dashboards/loki-1
|
||||
name: loki-dashboards-1
|
||||
- mountPath: /var/lib/grafana/dashboards/loki-2
|
||||
name: loki-dashboards-2
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
- mountPath: /var/lib/grafana/dashboards/mimir-1
|
||||
name: mimir-dashboards-1
|
||||
- mountPath: /var/lib/grafana/dashboards/mimir-2
|
||||
name: mimir-dashboards-2
|
||||
- mountPath: /var/lib/grafana/dashboards/mimir-3
|
||||
name: mimir-dashboards-3
|
||||
- mountPath: /var/lib/grafana/dashboards/mimir-4
|
||||
name: mimir-dashboards-4
|
||||
- mountPath: /var/lib/grafana/dashboards/mimir-5
|
||||
name: mimir-dashboards-5
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
- mountPath: /var/lib/grafana/dashboards/tempo-1
|
||||
name: tempo-dashboards-1
|
||||
{{- end }}
|
||||
- mountPath: /var/lib/grafana/dashboards/agent-1
|
||||
name: agent-dashboards-1
|
||||
volumes:
|
||||
- name: grafana-pv
|
||||
persistentVolumeClaim:
|
||||
claimName: grafana-pvc
|
||||
- name: loki-datasources-provisioning
|
||||
- name: datasources-provisioning
|
||||
configMap:
|
||||
name: loki-datasources-provisioning
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
- name: loki-dashboards-provisioning
|
||||
name: datasources-provisioning
|
||||
- name: dashboards-provisioning
|
||||
configMap:
|
||||
name: loki-dashboards-provisioning
|
||||
name: dashboards-provisioning
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
- name: loki-dashboards-1
|
||||
configMap:
|
||||
name: loki-dashboards-1
|
||||
@@ -91,6 +111,31 @@ spec:
|
||||
configMap:
|
||||
name: loki-dashboards-2
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
- name: mimir-dashboards-1
|
||||
configMap:
|
||||
name: mimir-dashboards-1
|
||||
- name: mimir-dashboards-2
|
||||
configMap:
|
||||
name: mimir-dashboards-2
|
||||
- name: mimir-dashboards-3
|
||||
configMap:
|
||||
name: mimir-dashboards-3
|
||||
- name: mimir-dashboards-4
|
||||
configMap:
|
||||
name: mimir-dashboards-4
|
||||
- name: mimir-dashboards-5
|
||||
configMap:
|
||||
name: mimir-dashboards-5
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
- name: tempo-dashboards-1
|
||||
configMap:
|
||||
name: tempo-dashboards-1
|
||||
{{- end }}
|
||||
- name: agent-dashboards-1
|
||||
configMap:
|
||||
name: agent-dashboards-1
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@@ -1,4 +1,4 @@
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
@@ -1,4 +1,4 @@
|
||||
{{- if .Values.local.logs.enabled }}
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mimir-dashboards-5
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"mimir-tenants.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-tenants.json" | fromJson | toJson }}
|
||||
"mimir-top-tenants.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-top-tenants.json" | fromJson | toJson }}
|
||||
"mimir-writes-networking.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-writes-networking.json" | fromJson | toJson }}
|
||||
"mimir-writes-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-writes-resources.json" | fromJson | toJson }}
|
||||
"mimir-writes.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-writes.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mimir-dashboards-1
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"mimir-alertmanager-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-alertmanager-resources.json" | fromJson | toJson }}
|
||||
"mimir-alertmanager.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-alertmanager.json" | fromJson | toJson }}
|
||||
"mimir-compactor-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-compactor-resources.json" | fromJson | toJson }}
|
||||
"mimir-compactor.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-compactor.json" | fromJson | toJson }}
|
||||
"mimir-config.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-config.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mimir-dashboards-2
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"mimir-object-store.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-object-store.json" | fromJson | toJson }}
|
||||
"mimir-overrides.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-overrides.json" | fromJson | toJson }}
|
||||
"mimir-overview-networking.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-overview-networking.json" | fromJson | toJson }}
|
||||
"mimir-overview-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-overview-resources.json" | fromJson | toJson }}
|
||||
"mimir-overview.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-overview.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mimir-dashboards-3
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"mimir-queries.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-queries.json" | fromJson | toJson }}
|
||||
"mimir-reads-networking.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-reads-networking.json" | fromJson | toJson }}
|
||||
"mimir-reads-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-reads-resources.json" | fromJson | toJson }}
|
||||
"mimir-reads.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-reads.json" | fromJson | toJson }}
|
||||
"mimir-remote-ruler-reads-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-remote-ruler-reads-resources.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -0,0 +1,19 @@
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mimir-dashboards-4
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"mimir-remote-ruler-reads.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-remote-ruler-reads.json" | fromJson | toJson }}
|
||||
"mimir-rollout-progress.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-rollout-progress.json" | fromJson | toJson }}
|
||||
"mimir-ruler.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-ruler.json" | fromJson | toJson }}
|
||||
"mimir-scaling.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-scaling.json" | fromJson | toJson }}
|
||||
"mimir-slow-queries.json": |
|
||||
{{ $.Files.Get "src/dashboards/mimir-slow-queries.json" | fromJson | toJson }}
|
||||
{{- end }}
|
@@ -0,0 +1,21 @@
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: tempo-dashboards-1
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
"tempo-operational.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-operational.json" | fromJson | toJson }}
|
||||
"tempo-reads.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-reads.json" | fromJson | toJson }}
|
||||
"tempo-resources.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-resources.json" | fromJson | toJson }}
|
||||
"tempo-rollout-progress.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-rollout-progress.json" | fromJson | toJson }}
|
||||
"tempo-tenants.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-tenants.json" | fromJson | toJson }}
|
||||
"tempo-writes.json": |
|
||||
{{ $.Files.Get "src/dashboards/tempo-writes.json" | fromJson | toJson }}
|
||||
{{- end }}
|
126
charts/meta-monitoring/templates/ruler/ruler.yaml
Normal file
126
charts/meta-monitoring/templates/ruler/ruler.yaml
Normal file
@@ -0,0 +1,126 @@
|
||||
{{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: meta-mimir-ruler-for-dashboards
|
||||
namespace: meta
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: ruler-for-dashboards
|
||||
app.kubernetes.io/instance: meta
|
||||
app.kubernetes.io/name: mimir
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 50%
|
||||
maxUnavailable: 0
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: ruler-for-dashboards
|
||||
app.kubernetes.io/instance: meta
|
||||
app.kubernetes.io/name: mimir
|
||||
namespace: meta
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- -target=ruler
|
||||
- -log.level=debug
|
||||
- -ruler-storage.backend=local
|
||||
- -ruler-storage.local.directory=/etc/rules
|
||||
- -ruler.ring.prefix=dashboards/
|
||||
- -config.expand-env=true
|
||||
- -config.file=/etc/mimir/mimir.yaml
|
||||
image: grafana/mimir:2.8.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: ruler
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http-metrics
|
||||
protocol: TCP
|
||||
- containerPort: 9095
|
||||
name: grpc
|
||||
protocol: TCP
|
||||
- containerPort: 7946
|
||||
name: memberlist
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http-metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 45
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /etc/mimir
|
||||
name: config
|
||||
- mountPath: /var/mimir
|
||||
name: runtime-config
|
||||
- mountPath: /data
|
||||
name: storage
|
||||
- mountPath: /active-query-tracker
|
||||
name: active-queries
|
||||
- mountPath: /etc/rules/anonymous
|
||||
name: rules
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext:
|
||||
fsGroup: 10001
|
||||
runAsGroup: 10001
|
||||
runAsNonRoot: true
|
||||
runAsUser: 10001
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
serviceAccount: meta-mimir
|
||||
serviceAccountName: meta-mimir
|
||||
terminationGracePeriodSeconds: 180
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: ruler
|
||||
app.kubernetes.io/instance: meta
|
||||
app.kubernetes.io/name: mimir
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: mimir.yaml
|
||||
path: mimir.yaml
|
||||
name: meta-mimir-config
|
||||
name: config
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: meta-mimir-runtime
|
||||
name: runtime-config
|
||||
- emptyDir: {}
|
||||
name: storage
|
||||
- emptyDir: {}
|
||||
name: active-queries
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: rules
|
||||
name: rules
|
||||
{{- end }}
|
18
charts/meta-monitoring/templates/ruler/rules-configmap.yaml
Normal file
18
charts/meta-monitoring/templates/ruler/rules-configmap.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
{{- if or (or .Values.dashboards.logs.enabled .Values.dashboards.metrics.enabled) .Values.dashboards.traces.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: rules
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
data:
|
||||
{{- if .Values.dashboards.logs.enabled }}
|
||||
{{ ($.Files.Glob "src/rules/loki-rules.yaml").AsConfig | indent 2 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.metrics.enabled }}
|
||||
{{ ($.Files.Glob "src/rules/mimir-rules.yaml").AsConfig | indent 2 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dashboards.traces.enabled }}
|
||||
{{ ($.Files.Glob "src/rules/tempo-rules.yaml").AsConfig | indent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@@ -14,7 +14,6 @@ local:
|
||||
minio:
|
||||
enabled: false # This should be set to true if any of the previous is enabled
|
||||
|
||||
|
||||
cloud:
|
||||
logs:
|
||||
enabled: true
|
||||
@@ -32,11 +31,37 @@ cloud:
|
||||
username:
|
||||
password:
|
||||
|
||||
# Adding regexes here will add a stage.replace block. For more information see
|
||||
# https://grafana.com/docs/agent/latest/flow/reference/components/loki.process/#stagereplace-block
|
||||
logs:
|
||||
piiRegexes:
|
||||
# This example replaces the word after password with *****
|
||||
# - expression: "password (\\\\S+)"
|
||||
# source: "" # Empty uses the log message
|
||||
# replace: "*****""
|
||||
|
||||
# Set enabled = true to add the default logs/metrics/traces dashboards to the local Grafana
|
||||
dashboards:
|
||||
logs:
|
||||
enabled: true
|
||||
metrics:
|
||||
enabled: true
|
||||
traces:
|
||||
enabled: true
|
||||
|
||||
global:
|
||||
minio:
|
||||
rootUser: "rootuser"
|
||||
rootPassword: "rootpassword"
|
||||
|
||||
kubeStateMetrics:
|
||||
# Scrape https://github.com/kubernetes/kube-state-metrics by default
|
||||
enabled: true
|
||||
# This endpoint is created when the helm chart from
|
||||
# https://artifacthub.io/packages/helm/prometheus-community/kube-state-metrics/
|
||||
# is used. Change this if kube-state-metrics is installed somewhere else.
|
||||
endpoint: kube-state-metrics.kube-state-metrics.svc.cluster.local:8080
|
||||
|
||||
# The following are configuration for the dependencies.
|
||||
# These should not be changed.
|
||||
|
||||
|
Reference in New Issue
Block a user