From c42718649fda242b06aecf850a889e772ecadf77 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 09:03:02 +0100 Subject: [PATCH 01/10] Fix distributor memory panel Signed-off-by: Michel Hollands --- charts/meta-monitoring/src/dashboards/loki-operational.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki-operational.json index fbb66eb..50b304b 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki-operational.json @@ -1921,7 +1921,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/distributor|(loki|enterprise-logs)-write|.*/loki)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|(loki|enterprise-logs)-write|.*/loki)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", From 0216163885447dfe01db632a1342635fa945ccbc Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 09:11:20 +0100 Subject: [PATCH 02/10] Add chunk reason Signed-off-by: Michel Hollands --- charts/meta-monitoring/src/dashboards/loki-operational.json | 2 +- charts/meta-monitoring/values.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki-operational.json index 50b304b..f0da709 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki-operational.json @@ -3388,7 +3388,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(.*ingester|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", "format": "heatmap", "instant": false, "interval": "", diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index f7af36d..08499f7 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -191,6 +191,7 @@ metrics: - loki_ingester_chunk_entries_sum - loki_ingester_chunk_size_bytes_bucket - loki_ingester_chunk_utilization_bucket + - loki_ingester_chunk_utilization_count - loki_ingester_chunk_utilization_sum - loki_ingester_chunks_flushed_total - loki_ingester_flush_queue_length From bd0ef0e2ccbc2f9772cd0d0c456880646fd099f0 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 09:21:05 +0100 Subject: [PATCH 03/10] Add missing values Signed-off-by: Michel Hollands --- charts/meta-monitoring/values.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index 08499f7..1b23831 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -149,6 +149,7 @@ metrics: - kube_pod_container_resource_requests - kube_pod_container_status_last_terminated_reason - kube_pod_container_status_restarts_total + - loki_azure_blob_request_duration_seconds_bucket - loki_boltdb_shipper_compact_tables_operation_duration_seconds - loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds - loki_boltdb_shipper_retention_marker_count_total @@ -174,11 +175,14 @@ metrics: - loki_compactor_deleted_lines - loki_compactor_oldest_pending_delete_request_age_seconds - loki_compactor_pending_delete_requests_count + - loki_consul_request_duration_seconds_bucket - loki_discarded_samples_total - loki_discarded_bytes_total - loki_distributor_bytes_received_total - loki_distributor_lines_received_total - loki_distributor_structured_metadata_bytes_received_total + - loki_gcs_request_duration_seconds_bucket + - loki_gcs_request_duration_seconds_count - loki_index_request_duration_seconds_count - loki_ingester_chunk_age_seconds_bucket - loki_ingester_chunk_age_seconds_count @@ -209,6 +213,8 @@ metrics: - loki_ruler_wal_prometheus_remote_storage_samples_total - loki_ruler_wal_samples_appended_total - loki_ruler_wal_storage_created_series_total + - loki_s3_request_duration_seconds_bucket + - loki_s3_request_duration_seconds_count - loki_write_batch_retries_total - loki_write_dropped_bytes_total - loki_write_dropped_entries_total From c1ff364c290006ab8988d2d952fe6de581bec12d Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 09:45:37 +0100 Subject: [PATCH 04/10] Add missing metric in reads dashboard Signed-off-by: Michel Hollands --- charts/meta-monitoring/values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index 1b23831..1f410e1 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -183,6 +183,7 @@ metrics: - loki_distributor_structured_metadata_bytes_received_total - loki_gcs_request_duration_seconds_bucket - loki_gcs_request_duration_seconds_count + - loki_index_request_duration_seconds_bucket - loki_index_request_duration_seconds_count - loki_ingester_chunk_age_seconds_bucket - loki_ingester_chunk_age_seconds_count From 1a4a1ad8855418006edb91cbe245aac1403e828b Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 10:17:55 +0100 Subject: [PATCH 05/10] Fix ruler panel Signed-off-by: Michel Hollands --- charts/meta-monitoring/src/dashboards/loki-reads-resources.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json index 036ca7f..ac6a996 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json @@ -2354,7 +2354,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*ruler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null From 2d85e7e120f884488ff7015d9ec6d7e217a3598a Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 10:56:35 +0100 Subject: [PATCH 06/10] Update dashboards so they work with single binary Signed-off-by: Michel Hollands --- .../src/dashboards/loki-operational.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki-operational.json index f0da709..f3a6655 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki-operational.json @@ -1824,7 +1824,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write|$namespace-[0-9]+)\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -1921,7 +1921,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|(loki|enterprise-logs)-write|.*/loki)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -2525,7 +2525,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -2622,7 +2622,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary|$namespace-[0-9]+)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -3308,7 +3308,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/.*ingester.*\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary)\", namespace=~\"$namespace\"}[$__rate_interval]))", "interval": "", "legendFormat": "{{ reason }}" } @@ -3481,7 +3481,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -3578,7 +3578,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary|$namespace-[0-9]+)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", From d5e8df856dd2e81d686b968cc4af6c05eefcdd04 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 14:06:41 +0100 Subject: [PATCH 07/10] Update writes dashboard work with all types Signed-off-by: Michel Hollands --- .../src/dashboards/loki-writes-resources.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json b/charts/meta-monitoring/src/dashboards/loki-writes-resources.json index f70fefb..11c15cb 100644 --- a/charts/meta-monitoring/src/dashboards/loki-writes-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-writes-resources.json @@ -104,7 +104,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -116,7 +116,7 @@ "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -206,7 +206,7 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -218,7 +218,7 @@ "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor|loki\", pod=~\"distributor|loki-write-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -269,7 +269,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*distributor\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*distributor|loki-write|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null From f5c9fa05935064863eb9f4ae721cda33a93dd76c Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 14:07:59 +0100 Subject: [PATCH 08/10] Update operation so it works with all types of deployment Signed-off-by: Michel Hollands --- .../src/dashboards/loki-operational.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-operational.json b/charts/meta-monitoring/src/dashboards/loki-operational.json index f3a6655..af09fe1 100644 --- a/charts/meta-monitoring/src/dashboards/loki-operational.json +++ b/charts/meta-monitoring/src/dashboards/loki-operational.json @@ -1824,7 +1824,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write|$namespace-[0-9]+)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*distributor.*|(loki|enterprise-logs)-write.*|$namespace-[0-9]+)\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -1921,7 +1921,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"(.*/.*distributor|$namespace/(loki|enterprise-logs)-write|.*/loki|$namespace/loki-single-binary)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -2525,7 +2525,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" @@ -2622,7 +2622,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write|loki-single-binary|$namespace-[0-9]+)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*ingester.*|(loki|enterprise-logs)-write.*|loki-single-binary|$namespace-[0-9]+)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -3578,7 +3578,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read|.*loki-single-binary|$namespace-[0-9]+)\"}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(.*querier.*|(loki|enterprise-logs)-read.*|.*loki-single-binary|$namespace-[0-9]+)\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", From 346dd4968ed3d3c330f3a9c3af8662b63c53daa8 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 14:36:53 +0100 Subject: [PATCH 09/10] Make reads-resources work for all 3 deployment modes Signed-off-by: Michel Hollands --- .../src/dashboards/loki-reads-resources.json | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json index ac6a996..1cc9a4f 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json @@ -104,19 +104,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -206,19 +206,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -269,7 +269,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-frontend\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-frontend|loki-read|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -371,19 +371,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -473,19 +473,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler|loki\", pod=~\"query-scheduler|loki-read-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -536,7 +536,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*query-scheduler\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-write|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -638,19 +638,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -740,19 +740,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -803,7 +803,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*querier\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-write|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -854,7 +854,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -902,7 +902,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1462,19 +1462,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1564,19 +1564,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1627,7 +1627,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*bloom-gateway\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*bloom-gateway|loki-read|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1678,7 +1678,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1726,7 +1726,7 @@ }, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway|loki\", pod=~\"query-frontend|loki-read-.*|$namespace-[0-9]*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -2189,19 +2189,19 @@ }, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"cpu\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -2291,19 +2291,19 @@ }, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\", resource=\"memory\"} > 0)", "format": "time_series", "legendFormat": "request", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler|loki\", pod=~\"ruler|loki-backend-.*|$namespace-[0-9]*\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -2354,7 +2354,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/.*ruler\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*ruler|loki-backend|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null From d309a5bc504556bad6f74a22517c9dabdd9af10e Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 13 May 2024 14:44:08 +0100 Subject: [PATCH 10/10] Fix mistakes Signed-off-by: Michel Hollands --- .../meta-monitoring/src/dashboards/loki-reads-resources.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json index 1cc9a4f..b6c186b 100644 --- a/charts/meta-monitoring/src/dashboards/loki-reads-resources.json +++ b/charts/meta-monitoring/src/dashboards/loki-reads-resources.json @@ -536,7 +536,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-write|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*query-scheduler|loki-read|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -803,7 +803,7 @@ }, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-write|loki-single-binary)\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(.*querier|loki-read|loki-single-binary)\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null